Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Classification/HeuristicLab.Problems.DataAnalysis.Classification/3.3/Symbolic/SymbolicClassificationProblem.cs @ 4366

Last change on this file since 4366 was 4366, checked in by mkommend, 14 years ago

added draft version of classification (ticket #939)

File size: 17.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Analyzers;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Creators;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Interfaces;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Symbols;
32using HeuristicLab.Parameters;
33using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
34using HeuristicLab.PluginInfrastructure;
35using HeuristicLab.Problems.DataAnalysis.Symbolic;
36using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
37
38namespace HeuristicLab.Problems.DataAnalysis.Classification {
39  [Item("Classification Problem", "Represents a classfication problem.")]
40  [StorableClass]
41  [Creatable("Problems")]
42  public class SymbolicClassificationProblem : SingleObjectiveClassificationProblem<ISymbolicClassificationEvaluator, ISymbolicExpressionTreeCreator> {
43    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
44    private const string FunctionTreeGrammarParameterName = "FunctionTreeGrammar";
45    private const string MaxExpressionLengthParameterName = "MaxExpressionLength";
46    private const string MaxExpressionDepthParameterName = "MaxExpressionDepth";
47    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
48    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
49    private const string MaxFunctionDefiningBranchensParameterName = "MaxFunctionDefiningBranches";
50    private const string MaxFunctionArgumentsParameterName = "MaxFunctionArguments";
51
52    #region properties
53    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
54      get { return SymbolicExpressionTreeInterpreterParameter.Value; }
55      protected set { SymbolicExpressionTreeInterpreterParameter.Value = value; }
56    }
57    public IValueParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
58      get { return (IValueParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
59    }
60
61    public ISymbolicExpressionGrammar FunctionTreeGrammar {
62      get { return (ISymbolicExpressionGrammar)FunctionTreeGrammarParameter.Value; }
63      protected set { FunctionTreeGrammarParameter.Value = value; }
64    }
65    public IValueParameter<ISymbolicExpressionGrammar> FunctionTreeGrammarParameter {
66      get { return (IValueParameter<ISymbolicExpressionGrammar>)Parameters[FunctionTreeGrammarParameterName]; }
67    }
68
69    public IntValue MaxExpressionLength {
70      get { return MaxExpressionLengthParameter.Value; }
71      protected set { MaxExpressionLengthParameter.Value = value; }
72    }
73    public IValueParameter<IntValue> MaxExpressionLengthParameter {
74      get { return (IValueParameter<IntValue>)Parameters[MaxExpressionLengthParameterName]; }
75    }
76
77    public IntValue MaxExpressionDepth {
78      get { return MaxExpressionDepthParameter.Value; }
79      protected set { MaxExpressionDepthParameter.Value = value; }
80    }
81    public ValueParameter<IntValue> MaxExpressionDepthParameter {
82      get { return (ValueParameter<IntValue>)Parameters[MaxExpressionDepthParameterName]; }
83    }
84
85    public DoubleValue UpperEstimationLimit {
86      get { return UpperEstimationLimitParameter.Value; }
87      protected set { UpperEstimationLimitParameter.Value = value; }
88    }
89    public IValueParameter<DoubleValue> UpperEstimationLimitParameter {
90      get { return (IValueParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
91    }
92
93    public DoubleValue LowerEstimationLimit {
94      get { return LowerEstimationLimitParameter.Value; }
95      protected set { LowerEstimationLimitParameter.Value = value; }
96    }
97    public IValueParameter<DoubleValue> LowerEstimationLimitParameter {
98      get { return (IValueParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
99    }
100
101    public IntValue MaxFunctionDefiningBranches {
102      get { return MaxFunctionDefiningBranchesParameter.Value; }
103      protected set { MaxFunctionDefiningBranchesParameter.Value = value; }
104    }
105    public IValueParameter<IntValue> MaxFunctionDefiningBranchesParameter {
106      get { return (IValueParameter<IntValue>)Parameters[MaxFunctionDefiningBranchensParameterName]; }
107    }
108
109    public IntValue MaxFunctionArguments {
110      get { return MaxFunctionArgumentsParameter.Value; }
111      protected set { MaxFunctionArgumentsParameter.Value = value; }
112    }
113    public IValueParameter<IntValue> MaxFunctionArgumentsParameter {
114      get { return (IValueParameter<IntValue>)Parameters[MaxFunctionArgumentsParameterName]; }
115    }
116
117    public DoubleValue PunishmentFactor {
118      get { return new DoubleValue(10.0); }
119    }
120    public IntValue TrainingSamplesStart { get { return new IntValue(ClassificationProblemData.TrainingSamplesStart.Value); } }
121    public IntValue TrainingSamplesEnd {
122      get { return new IntValue((ClassificationProblemData.TrainingSamplesStart.Value + ClassificationProblemData.TrainingSamplesEnd.Value) / 2); }
123    }
124    public IntValue ValidationSamplesStart { get { return TrainingSamplesEnd; } }
125    public IntValue ValidationSamplesEnd { get { return new IntValue(ClassificationProblemData.TrainingSamplesEnd.Value); } }
126    public IntValue TestSamplesStart { get { return ClassificationProblemData.TestSamplesStart; } }
127    public IntValue TestSamplesEnd { get { return ClassificationProblemData.TestSamplesEnd; } }
128    #endregion
129
130    [StorableConstructor]
131    protected SymbolicClassificationProblem(bool deserializing) : base(deserializing) { }
132    public SymbolicClassificationProblem()
133      : base() {
134      Parameters.Add(new ValueParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used to evaluate the symbolic expression tree."));
135      Parameters.Add(new ValueParameter<ISymbolicExpressionGrammar>(FunctionTreeGrammarParameterName, "The grammar that should be used for symbolic regression models."));
136      Parameters.Add(new ValueParameter<IntValue>(MaxExpressionLengthParameterName, "Maximal length of the symbolic expression."));
137      Parameters.Add(new ValueParameter<IntValue>(MaxExpressionDepthParameterName, "Maximal depth of the symbolic expression."));
138      Parameters.Add(new ValueParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper limit for the estimated value that can be returned by the symbolic regression model."));
139      Parameters.Add(new ValueParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower limit for the estimated value that can be returned by the symbolic regression model."));
140      Parameters.Add(new ValueParameter<IntValue>(MaxFunctionDefiningBranchensParameterName, "Maximal number of automatically defined functions."));
141      Parameters.Add(new ValueParameter<IntValue>(MaxFunctionArgumentsParameterName, "Maximal number of arguments of automatically defined functions."));
142
143      SolutionCreator = new ProbabilisticTreeCreator();
144      Evaluator = new SymbolicClassifacitionMeanSquaredErrorEvaluator();
145      ParameterizeSolutionCreator();
146      Maximization = new BoolValue(false);
147      FunctionTreeGrammar = new GlobalSymbolicExpressionGrammar(new FullFunctionalExpressionGrammar());
148      SymbolicExpressionTreeInterpreter = new SimpleArithmeticExpressionInterpreter();
149      MaxExpressionLength = new IntValue(100);
150      MaxExpressionDepth = new IntValue(10);
151      MaxFunctionDefiningBranches = new IntValue(0);
152      MaxFunctionArguments = new IntValue(0);
153
154      InitializeOperators();
155      RegisterParameterEvents();
156
157      UpdateEstimationLimits();
158      ParameterizeEvaluator();
159      ParameterizeSolutionCreator();
160      ParameterizeGrammar();
161      ParameterizeOperators();
162      ParameterizeAnalyzers();
163    }
164
165
166    [StorableHook(HookType.AfterDeserialization)]
167    private void AfterDeserialization() {
168
169    }
170
171    private void RegisterParameterEvents() {
172      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
173      FunctionTreeGrammarParameter.ValueChanged += new EventHandler(FunctionTreeGrammarParameter_ValueChanged);
174
175      MaxFunctionArgumentsParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
176      MaxFunctionDefiningBranchesParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
177      MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
178      MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
179    }
180
181    protected override void OnEvaluatorChanged() {
182      ParameterizeEvaluator();
183      ParameterizeAnalyzers();
184      base.OnEvaluatorChanged();
185    }
186
187    protected override void OnSolutionCreatorChanged() {
188      ParameterizeSolutionCreator();
189      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
190      base.OnSolutionCreatorChanged();
191    }
192    private void SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged(object sender, System.EventArgs e) {
193      ParameterizeEvaluator();
194      ParameterizeOperators();
195      ParameterizeAnalyzers();
196    }
197
198    protected override void OnClassificationProblemDataChanged() {
199      ParameterizeAnalyzers();
200      ParameterizeGrammar();
201      ParameterizeEvaluator();
202      UpdateEstimationLimits();
203      base.OnClassificationProblemDataChanged();
204    }
205
206    private void FunctionTreeGrammarParameter_ValueChanged(object sender, System.EventArgs e) {
207      if (!(FunctionTreeGrammar is GlobalSymbolicExpressionGrammar))
208        FunctionTreeGrammar = new GlobalSymbolicExpressionGrammar(FunctionTreeGrammar);
209      OnGrammarChanged();
210    }
211    protected virtual void OnGrammarChanged() {
212      ParameterizeGrammar();
213    }
214
215    private void ArchitectureParameter_ValueChanged(object sender, EventArgs e) {
216      MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
217      MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
218      OnArchitectureParameterChanged();
219    }
220    private void ArchitectureParameterValue_ValueChanged(object sender, EventArgs e) {
221      OnArchitectureParameterChanged();
222    }
223    protected virtual void OnArchitectureParameterChanged() {
224      ParameterizeGrammar();
225    }
226
227    protected virtual void InitializeOperators() {
228      Operators.AddRange(ApplicationManager.Manager.GetInstances<ISymbolicExpressionTreeOperator>().OfType<IOperator>());
229      Operators.Add(new MinAverageMaxSymbolicExpressionTreeSizeAnalyzer());
230      Operators.Add(new ValidationBestSymbolicClassificationSolutionAnalyzer());
231    }
232
233    #region operator parameterization
234    private void UpdateEstimationLimits() {
235      if (TrainingSamplesStart.Value < TrainingSamplesEnd.Value &&
236        ClassificationProblemData.Dataset.VariableNames.Contains(ClassificationProblemData.TargetVariable.Value)) {
237        var targetValues = ClassificationProblemData.Dataset.GetVariableValues(ClassificationProblemData.TargetVariable.Value, TrainingSamplesStart.Value, TrainingSamplesEnd.Value);
238        var mean = targetValues.Average();
239        var range = targetValues.Max() - targetValues.Min();
240        UpperEstimationLimit = new DoubleValue(mean + PunishmentFactor.Value * range);
241        LowerEstimationLimit = new DoubleValue(mean - PunishmentFactor.Value * range);
242      }
243    }
244
245    private void ParameterizeEvaluator() {
246      if (Evaluator != null) {
247        Evaluator.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
248        Evaluator.RegressionProblemDataParameter.ActualName = ClassificationProblemDataParameter.Name;
249        Evaluator.SamplesStartParameter.Value = TrainingSamplesStart;
250        Evaluator.SamplesEndParameter.Value = TrainingSamplesEnd;
251      }
252    }
253
254    private void ParameterizeGrammar() {
255      List<LaggedVariable> laggedSymbols = FunctionTreeGrammar.Symbols.OfType<LaggedVariable>().ToList();
256      foreach (Symbol symbol in laggedSymbols)
257        FunctionTreeGrammar.RemoveSymbol(symbol);
258      foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols.Variable>()) {
259        varSymbol.VariableNames = ClassificationProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value);
260      }
261      var globalGrammar = FunctionTreeGrammar as GlobalSymbolicExpressionGrammar;
262      if (globalGrammar != null) {
263        globalGrammar.MaxFunctionArguments = MaxFunctionArguments.Value;
264        globalGrammar.MaxFunctionDefinitions = MaxFunctionDefiningBranches.Value;
265      }
266    }
267
268    private void ParameterizeSolutionCreator() {
269      SolutionCreator.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
270      SolutionCreator.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
271      SolutionCreator.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
272      SolutionCreator.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
273      SolutionCreator.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
274    }
275
276    private void ParameterizeOperators() {
277      foreach (ISymbolicExpressionTreeOperator op in Operators.OfType<ISymbolicExpressionTreeOperator>()) {
278        op.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
279        op.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
280        op.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
281      }
282      foreach (ISymbolicExpressionTreeCrossover op in Operators.OfType<ISymbolicExpressionTreeCrossover>()) {
283        op.ParentsParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
284        op.ChildParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
285      }
286      foreach (ISymbolicExpressionTreeManipulator op in Operators.OfType<ISymbolicExpressionTreeManipulator>()) {
287        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
288      }
289      foreach (ISymbolicExpressionTreeArchitectureManipulator op in Operators.OfType<ISymbolicExpressionTreeArchitectureManipulator>()) {
290        op.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
291        op.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
292      }
293    }
294
295    private void ParameterizeAnalyzers() {
296      foreach (ISymbolicExpressionTreeAnalyzer analyzer in Operators.OfType<ISymbolicExpressionTreeAnalyzer>()) {
297        analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
298        var bestValidationSolutionAnalyzer = analyzer as ValidationBestSymbolicClassificationSolutionAnalyzer;
299        if (bestValidationSolutionAnalyzer != null) {
300          bestValidationSolutionAnalyzer.ClassificationProblemDataParameter.ActualName = ClassificationProblemDataParameter.Name;
301          bestValidationSolutionAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
302          bestValidationSolutionAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
303          bestValidationSolutionAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
304          bestValidationSolutionAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
305          bestValidationSolutionAnalyzer.ValidationSamplesStartParameter.Value = ValidationSamplesStart;
306          bestValidationSolutionAnalyzer.ValidationSamplesEndParameter.Value = ValidationSamplesEnd;
307        }
308      }
309    }
310    #endregion
311  }
312}
Note: See TracBrowser for help on using the repository browser.