Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.MultiVariate.Regression/3.3/Symbolic/SymbolicVectorRegressionProblem.cs @ 7573

Last change on this file since 7573 was 5275, checked in by gkronber, 14 years ago

Merged changes from trunk to data analysis exploration branch and added fractional distance metric evaluator. #1142

File size: 17.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Analyzers;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Creators;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Interfaces;
32using HeuristicLab.Optimization;
33using HeuristicLab.Parameters;
34using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
35using HeuristicLab.PluginInfrastructure;
36using HeuristicLab.Problems.DataAnalysis.Symbolic;
37
38namespace HeuristicLab.Problems.DataAnalysis.MultiVariate.Regression.Symbolic {
39  [StorableClass]
40  public class SymbolicVectorRegressionProblem : MultiVariateDataAnalysisProblem, IProblem {
41
42    #region Parameter Properties
43    public new ValueParameter<SymbolicExpressionTreeCreator> SolutionCreatorParameter {
44      get { return (ValueParameter<SymbolicExpressionTreeCreator>)Parameters["SolutionCreator"]; }
45    }
46
47    IParameter IProblem.SolutionCreatorParameter {
48      get {
49        return SolutionCreatorParameter;
50      }
51    }
52    public ValueParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
53      get { return (ValueParameter<ISymbolicExpressionTreeInterpreter>)Parameters["SymbolicExpressionTreeInterpreter"]; }
54    }
55
56    public ValueParameter<ISymbolicExpressionGrammar> FunctionTreeGrammarParameter {
57      get { return (ValueParameter<ISymbolicExpressionGrammar>)Parameters["FunctionTreeGrammar"]; }
58    }
59    public ValueParameter<IntValue> MaxExpressionLengthParameter {
60      get { return (ValueParameter<IntValue>)Parameters["MaxExpressionLength"]; }
61    }
62    public ValueParameter<IntValue> MaxExpressionDepthParameter {
63      get { return (ValueParameter<IntValue>)Parameters["MaxExpressionDepth"]; }
64    }
65    public ValueParameter<IntValue> MaxFunctionDefiningBranchesParameter {
66      get { return (ValueParameter<IntValue>)Parameters["MaxFunctionDefiningBranches"]; }
67    }
68    public ValueParameter<IntValue> MaxFunctionArgumentsParameter {
69      get { return (ValueParameter<IntValue>)Parameters["MaxFunctionArguments"]; }
70    }
71    public ValueParameter<DoubleArray> UpperEstimationLimitParameter {
72      get { return (ValueParameter<DoubleArray>)Parameters["UpperEstimationLimit"]; }
73    }
74    public ValueParameter<DoubleArray> LowerEstimationLimitParameter {
75      get { return (ValueParameter<DoubleArray>)Parameters["LowerEstimationLimit"]; }
76    }
77    #endregion
78
79    #region Properties
80    public IntValue MaxExpressionLength {
81      get { return MaxExpressionLengthParameter.Value; }
82      set { MaxExpressionLengthParameter.Value = value; }
83    }
84    public IntValue MaxExpressionDepth {
85      get { return MaxExpressionDepthParameter.Value; }
86      set { MaxExpressionDepthParameter.Value = value; }
87    }
88    public IntValue MaxFunctionDefiningBranches {
89      get { return MaxFunctionDefiningBranchesParameter.Value; }
90      set { MaxFunctionDefiningBranchesParameter.Value = value; }
91    }
92    public IntValue MaxFunctionArguments {
93      get { return MaxFunctionArgumentsParameter.Value; }
94      set { MaxFunctionArgumentsParameter.Value = value; }
95    }
96    public new SymbolicExpressionTreeCreator SolutionCreator {
97      get { return SolutionCreatorParameter.Value; }
98      set { SolutionCreatorParameter.Value = value; }
99    }
100    public DoubleArray UpperEstimationLimit {
101      get { return UpperEstimationLimitParameter.Value; }
102      set { UpperEstimationLimitParameter.Value = value; }
103    }
104    public DoubleArray LowerEstimationLimit {
105      get { return LowerEstimationLimitParameter.Value; }
106      set { LowerEstimationLimitParameter.Value = value; }
107    }
108    ISolutionCreator IProblem.SolutionCreator {
109      get { return SolutionCreatorParameter.Value; }
110    }
111    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
112      get { return SymbolicExpressionTreeInterpreterParameter.Value; }
113      set { SymbolicExpressionTreeInterpreterParameter.Value = value; }
114    }
115
116    public ISymbolicExpressionGrammar FunctionTreeGrammar {
117      get { return (ISymbolicExpressionGrammar)FunctionTreeGrammarParameter.Value; }
118      set { FunctionTreeGrammarParameter.Value = value; }
119    }
120
121    public override IEnumerable<IOperator> Operators {
122      get { return operators; }
123    }
124    public IEnumerable<IAnalyzer> Analyzers {
125      get { return operators.OfType<IAnalyzer>(); }
126    }
127    public IntValue TrainingSamplesStart {
128      get { return new IntValue(MultiVariateDataAnalysisProblemData.TrainingSamplesStart.Value); }
129    }
130    public IntValue TrainingSamplesEnd {
131      get {
132        return new IntValue((MultiVariateDataAnalysisProblemData.TrainingSamplesStart.Value +
133          MultiVariateDataAnalysisProblemData.TrainingSamplesEnd.Value) / 2);
134      }
135    }
136    public IntValue ValidationSamplesStart {
137      get { return TrainingSamplesEnd; }
138    }
139    public IntValue ValidationSamplesEnd {
140      get { return new IntValue(MultiVariateDataAnalysisProblemData.TrainingSamplesEnd.Value); }
141    }
142    public IntValue TestSamplesStart {
143      get { return MultiVariateDataAnalysisProblemData.TestSamplesStart; }
144    }
145    public IntValue TestSamplesEnd {
146      get { return MultiVariateDataAnalysisProblemData.TestSamplesEnd; }
147    }
148    public DoubleValue PunishmentFactor {
149      get { return new DoubleValue(10.0); }
150    }
151    #endregion
152
153    [Storable]
154    private SymbolicVectorRegressionGrammar grammar;
155    [Storable]
156    private List<IOperator> operators;
157
158    [StorableConstructor]
159    protected SymbolicVectorRegressionProblem(bool deserializing) : base(deserializing) { }
160    protected SymbolicVectorRegressionProblem(SymbolicVectorRegressionProblem original, Cloner cloner)
161      : base(original, cloner) {
162      operators = original.operators.Select(x => (IOperator)cloner.Clone(x)).ToList();
163      RegisterParameterEvents();
164      RegisterParameterValueEvents();
165    }
166    public SymbolicVectorRegressionProblem()
167      : base() {
168      SymbolicExpressionTreeCreator creator = new ProbabilisticTreeCreator();
169      grammar = new SymbolicVectorRegressionGrammar(MultiVariateDataAnalysisProblemData.TargetVariables.CheckedItems.Count());
170      var globalGrammar = new GlobalSymbolicExpressionGrammar(grammar);
171      var interpreter = new SimpleArithmeticExpressionInterpreter();
172      Parameters.Add(new ValueParameter<SymbolicExpressionTreeCreator>("SolutionCreator", "The operator which should be used to create new symbolic regression solutions.", creator));
173      Parameters.Add(new ValueParameter<ISymbolicExpressionTreeInterpreter>("SymbolicExpressionTreeInterpreter", "The interpreter that should be used to evaluate the symbolic expression tree.", interpreter));
174      Parameters.Add(new ValueParameter<ISymbolicExpressionGrammar>("FunctionTreeGrammar", "The grammar that should be used for symbolic regression models.", globalGrammar));
175      Parameters.Add(new ValueParameter<IntValue>("MaxExpressionLength", "Maximal length of the symbolic expression.", new IntValue(100)));
176      Parameters.Add(new ValueParameter<IntValue>("MaxExpressionDepth", "Maximal depth of the symbolic expression.", new IntValue(10)));
177      Parameters.Add(new ValueParameter<IntValue>("MaxFunctionDefiningBranches", "Maximal number of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly()));
178      Parameters.Add(new ValueParameter<IntValue>("MaxFunctionArguments", "Maximal number of arguments of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly()));
179      Parameters.Add(new ValueParameter<DoubleArray>("UpperEstimationLimit", "The upper limit for the estimated values for each component."));
180      Parameters.Add(new ValueParameter<DoubleArray>("LowerEstimationLimit", "The lower limit for the estimated values for each component."));
181      creator.SymbolicExpressionTreeParameter.ActualName = "SymbolicVectorRegressionModel";
182
183      ParameterizeSolutionCreator();
184      UpdateGrammar();
185      UpdateEstimationLimits();
186      InitializeOperators();
187      RegisterParameterEvents();
188      RegisterParameterValueEvents();
189    }
190
191    [StorableHook(HookType.AfterDeserialization)]
192    private void AfterDeserialization() {
193      // BackwardsCompatibility3.3
194      #region Backwards compatible code (remove with 3.4)
195      if (operators == null) InitializeOperators();
196      #endregion
197      RegisterParameterEvents();
198      RegisterParameterValueEvents();
199    }
200
201    public override IDeepCloneable Clone(Cloner cloner) {
202      return new SymbolicVectorRegressionProblem(this, cloner);
203    }
204
205    private void RegisterParameterValueEvents() {
206      MaxFunctionArgumentsParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
207      MaxFunctionDefiningBranchesParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
208      SolutionCreatorParameter.ValueChanged += new EventHandler(SolutionCreatorParameter_ValueChanged);
209    }
210
211    private void RegisterParameterEvents() {
212      MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
213      MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
214      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
215    }
216
217    #region event handling
218    protected override void OnMultiVariateDataAnalysisProblemChanged(EventArgs e) {
219      base.OnMultiVariateDataAnalysisProblemChanged(e);
220      int dimension = MultiVariateDataAnalysisProblemData.TargetVariables.CheckedItems.Count();
221      // paritions should be updated
222      ParameterizeAnalyzers();
223      // input variables should be updated
224      UpdateGrammar();
225      UpdateEstimationLimits();
226    }
227
228    protected virtual void OnArchitectureParameterChanged(EventArgs e) {
229      UpdateGrammar();
230    }
231
232    protected virtual void OnGrammarChanged(EventArgs e) { }
233    protected virtual void OnOperatorsChanged(EventArgs e) { RaiseOperatorsChanged(e); }
234    protected virtual void OnSolutionCreatorChanged(EventArgs e) {
235      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
236      ParameterizeSolutionCreator();
237      OnSolutionParameterNameChanged(e);
238      RaiseSolutionCreatorChanged(e);
239    }
240
241    protected virtual void OnSolutionParameterNameChanged(EventArgs e) {
242      ParameterizeAnalyzers();
243      ParameterizeOperators();
244    }
245    #endregion
246
247    #region event handlers
248    private void SolutionCreatorParameter_ValueChanged(object sender, EventArgs e) {
249      OnSolutionCreatorChanged(e);
250    }
251    private void SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged(object sender, EventArgs e) {
252      OnSolutionParameterNameChanged(e);
253    }
254    private void ArchitectureParameter_ValueChanged(object sender, EventArgs e) {
255      MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
256      MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
257      OnArchitectureParameterChanged(e);
258    }
259    private void ArchitectureParameterValue_ValueChanged(object sender, EventArgs e) {
260      OnArchitectureParameterChanged(e);
261    }
262    #endregion
263
264    #region Helpers
265    protected void AddOperator(IOperator op) {
266      operators.Add(op);
267    }
268
269    private void UpdateGrammar() {
270      var selectedTargetVariables = MultiVariateDataAnalysisProblemData.TargetVariables.CheckedItems;
271      grammar.SetDimension(selectedTargetVariables.Count());
272      foreach (var varSymbol in grammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols.Variable>()) {
273        varSymbol.VariableNames = MultiVariateDataAnalysisProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value);
274      }
275
276      var globalGrammar = new GlobalSymbolicExpressionGrammar(grammar);
277      globalGrammar.MaxFunctionArguments = MaxFunctionArguments.Value;
278      globalGrammar.MaxFunctionDefinitions = MaxFunctionDefiningBranches.Value;
279      FunctionTreeGrammar = globalGrammar;
280    }
281
282    private void UpdateEstimationLimits() {
283      IEnumerable<string> selectedTargetVariables = MultiVariateDataAnalysisProblemData.TargetVariables.CheckedItems.Select(x => x.Value.Value);
284      UpperEstimationLimit = new DoubleArray(selectedTargetVariables.Count());
285      LowerEstimationLimit = new DoubleArray(selectedTargetVariables.Count());
286      int i = 0;
287      foreach (string targetVariable in selectedTargetVariables) {
288        if (TrainingSamplesStart.Value < TrainingSamplesEnd.Value) {
289          var targetValues = MultiVariateDataAnalysisProblemData.Dataset.GetVariableValues(targetVariable, TrainingSamplesStart.Value, TrainingSamplesEnd.Value);
290          var mean = targetValues.Average();
291          var range = targetValues.Max() - targetValues.Min();
292          UpperEstimationLimit[i] = mean + PunishmentFactor.Value * range;
293          LowerEstimationLimit[i] = mean - PunishmentFactor.Value * range;
294        } else {
295          UpperEstimationLimit[i] = 0;
296          LowerEstimationLimit[i] = 0;
297        }
298        i++;
299      }
300    }
301
302
303    private void InitializeOperators() {
304      operators = new List<IOperator>();
305      operators.AddRange(ApplicationManager.Manager.GetInstances<ISymbolicExpressionTreeOperator>().OfType<IOperator>());
306      operators.Add(new MinAverageMaxSymbolicExpressionTreeSizeAnalyzer());
307      // operators.Add(new SymbolicVectorRegressionVariableFrequencyAnalyzer());
308      ParameterizeOperators();
309      ParameterizeAnalyzers();
310    }
311
312    private void ParameterizeSolutionCreator() {
313      SolutionCreator.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
314      SolutionCreator.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
315      SolutionCreator.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
316      SolutionCreator.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
317      SolutionCreator.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
318    }
319
320    private void ParameterizeAnalyzers() {
321      foreach (ISymbolicExpressionTreeAnalyzer analyzer in Analyzers.OfType<ISymbolicExpressionTreeAnalyzer>()) {
322        analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
323      }
324      //foreach (var analyzer in Analyzers) {
325      //  var varFreqAnalyzer = analyzer as SymbolicVectorRegressionVariableFrequencyAnalyzer;
326      //  if (varFreqAnalyzer != null) {
327      //    varFreqAnalyzer.ProblemDataParameter.ActualName = MultiVariateDataAnalysisProblemDataParameter.Name;
328      //    varFreqAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
329      //  }
330      //}
331    }
332
333    private void ParameterizeOperators() {
334      foreach (ISymbolicExpressionTreeOperator op in Operators.OfType<ISymbolicExpressionTreeOperator>()) {
335        op.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
336        op.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
337        op.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
338      }
339      foreach (ISymbolicExpressionTreeCrossover op in Operators.OfType<ISymbolicExpressionTreeCrossover>()) {
340        op.ParentsParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
341        op.ChildParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
342      }
343      foreach (ISymbolicExpressionTreeManipulator op in Operators.OfType<ISymbolicExpressionTreeManipulator>()) {
344        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
345      }
346      foreach (ISymbolicExpressionTreeArchitectureManipulator op in Operators.OfType<ISymbolicExpressionTreeArchitectureManipulator>()) {
347        op.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
348        op.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
349      }
350    }
351    #endregion
352
353  }
354}
Note: See TracBrowser for help on using the repository browser.