Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Classification/3.3/Symbolic/SymbolicClassificationProblem.cs @ 6284

Last change on this file since 6284 was 5809, checked in by mkommend, 14 years ago

#1418: Reintegrated branch into trunk.

File size: 19.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Analyzers;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Creators;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Interfaces;
32using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Symbols;
33using HeuristicLab.Parameters;
34using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
35using HeuristicLab.PluginInfrastructure;
36using HeuristicLab.Problems.DataAnalysis.Classification.Symbolic.Analyzers;
37using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers;
38using HeuristicLab.Problems.DataAnalysis.Symbolic;
39using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
40
41namespace HeuristicLab.Problems.DataAnalysis.Classification {
42  [Item("Classification Problem", "Represents a classfication problem.")]
43  [StorableClass]
44  [NonDiscoverableType]
45  public sealed class SymbolicClassificationProblem : SingleObjectiveClassificationProblem<ISymbolicClassificationEvaluator, ISymbolicExpressionTreeCreator>, IStorableContent {
46    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
47    private const string FunctionTreeGrammarParameterName = "FunctionTreeGrammar";
48    private const string MaxExpressionLengthParameterName = "MaxExpressionLength";
49    private const string MaxExpressionDepthParameterName = "MaxExpressionDepth";
50    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
51    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
52    private const string MaxFunctionDefiningBranchensParameterName = "MaxFunctionDefiningBranches";
53    private const string MaxFunctionArgumentsParameterName = "MaxFunctionArguments";
54
55    #region properties
56    public string Filename { get; set; }
57
58    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
59      get { return SymbolicExpressionTreeInterpreterParameter.Value; }
60      private set { SymbolicExpressionTreeInterpreterParameter.Value = value; }
61    }
62    public IValueParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
63      get { return (IValueParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
64    }
65
66    public ISymbolicExpressionGrammar FunctionTreeGrammar {
67      get { return (ISymbolicExpressionGrammar)FunctionTreeGrammarParameter.Value; }
68      private set { FunctionTreeGrammarParameter.Value = value; }
69    }
70    public IValueParameter<ISymbolicExpressionGrammar> FunctionTreeGrammarParameter {
71      get { return (IValueParameter<ISymbolicExpressionGrammar>)Parameters[FunctionTreeGrammarParameterName]; }
72    }
73
74    public IntValue MaxExpressionLength {
75      get { return MaxExpressionLengthParameter.Value; }
76      private set { MaxExpressionLengthParameter.Value = value; }
77    }
78    public IValueParameter<IntValue> MaxExpressionLengthParameter {
79      get { return (IValueParameter<IntValue>)Parameters[MaxExpressionLengthParameterName]; }
80    }
81
82    public IntValue MaxExpressionDepth {
83      get { return MaxExpressionDepthParameter.Value; }
84      private set { MaxExpressionDepthParameter.Value = value; }
85    }
86    public ValueParameter<IntValue> MaxExpressionDepthParameter {
87      get { return (ValueParameter<IntValue>)Parameters[MaxExpressionDepthParameterName]; }
88    }
89
90    public DoubleValue UpperEstimationLimit {
91      get { return UpperEstimationLimitParameter.Value; }
92      private set { UpperEstimationLimitParameter.Value = value; }
93    }
94    public IValueParameter<DoubleValue> UpperEstimationLimitParameter {
95      get { return (IValueParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
96    }
97
98    public DoubleValue LowerEstimationLimit {
99      get { return LowerEstimationLimitParameter.Value; }
100      private set { LowerEstimationLimitParameter.Value = value; }
101    }
102    public IValueParameter<DoubleValue> LowerEstimationLimitParameter {
103      get { return (IValueParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
104    }
105
106    public IntValue MaxFunctionDefiningBranches {
107      get { return MaxFunctionDefiningBranchesParameter.Value; }
108      private set { MaxFunctionDefiningBranchesParameter.Value = value; }
109    }
110    public IValueParameter<IntValue> MaxFunctionDefiningBranchesParameter {
111      get { return (IValueParameter<IntValue>)Parameters[MaxFunctionDefiningBranchensParameterName]; }
112    }
113
114    public IntValue MaxFunctionArguments {
115      get { return MaxFunctionArgumentsParameter.Value; }
116      private set { MaxFunctionArgumentsParameter.Value = value; }
117    }
118    public IValueParameter<IntValue> MaxFunctionArgumentsParameter {
119      get { return (IValueParameter<IntValue>)Parameters[MaxFunctionArgumentsParameterName]; }
120    }
121
122    public DoubleValue PunishmentFactor {
123      get { return new DoubleValue(10.0); }
124    }
125    public IntValue TrainingSamplesStart { get { return new IntValue(ClassificationProblemData.TrainingIndizes.First()); } }
126    public IntValue TrainingSamplesEnd {
127      get {
128        int endIndex = (int)(ClassificationProblemData.TrainingIndizes.Count() * (1.0 - ClassificationProblemData.ValidationPercentage.Value) - 1);
129        if (endIndex < 0) endIndex = 0;
130        return new IntValue(ClassificationProblemData.TrainingIndizes.ElementAt(endIndex));
131      }
132    }
133    public IntValue ValidationSamplesStart { get { return TrainingSamplesEnd; } }
134    public IntValue ValidationSamplesEnd { get { return new IntValue(ClassificationProblemData.TrainingIndizes.Last() + 1); } }
135    public IntValue TestSamplesStart { get { return ClassificationProblemData.TestSamplesStart; } }
136    public IntValue TestSamplesEnd { get { return ClassificationProblemData.TestSamplesEnd; } }
137    #endregion
138
139    [StorableConstructor]
140    private SymbolicClassificationProblem(bool deserializing) : base(deserializing) { }
141    private SymbolicClassificationProblem(SymbolicClassificationProblem original, Cloner cloner)
142      : base(original, cloner) {
143      RegisterParameterEvents();
144    }
145
146    public SymbolicClassificationProblem()
147      : base() {
148      Parameters.Add(new ValueParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used to evaluate the symbolic expression tree."));
149      Parameters.Add(new ValueParameter<ISymbolicExpressionGrammar>(FunctionTreeGrammarParameterName, "The grammar that should be used for symbolic regression models."));
150      Parameters.Add(new ValueParameter<IntValue>(MaxExpressionLengthParameterName, "Maximal length of the symbolic expression."));
151      Parameters.Add(new ValueParameter<IntValue>(MaxExpressionDepthParameterName, "Maximal depth of the symbolic expression."));
152      Parameters.Add(new ValueParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper limit for the estimated value that can be returned by the symbolic regression model."));
153      Parameters.Add(new ValueParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower limit for the estimated value that can be returned by the symbolic regression model."));
154      Parameters.Add(new ValueParameter<IntValue>(MaxFunctionDefiningBranchensParameterName, "Maximal number of automatically defined functions."));
155      Parameters.Add(new ValueParameter<IntValue>(MaxFunctionArgumentsParameterName, "Maximal number of arguments of automatically defined functions."));
156
157      SolutionCreator = new ProbabilisticTreeCreator();
158      Evaluator = new SymbolicClassifacitionMeanSquaredErrorEvaluator();
159      ParameterizeSolutionCreator();
160      Maximization = new BoolValue(false);
161      FunctionTreeGrammar = new GlobalSymbolicExpressionGrammar(new FullFunctionalExpressionGrammar());
162      SymbolicExpressionTreeInterpreter = new SimpleArithmeticExpressionInterpreter();
163      MaxExpressionLength = new IntValue(100);
164      MaxExpressionDepth = new IntValue(10);
165      MaxFunctionDefiningBranches = new IntValue(0);
166      MaxFunctionArguments = new IntValue(0);
167
168      InitializeOperators();
169      RegisterParameterEvents();
170
171      UpdateEstimationLimits();
172      ParameterizeEvaluator();
173      ParameterizeSolutionCreator();
174      ParameterizeGrammar();
175      ParameterizeOperators();
176      ParameterizeAnalyzers();
177    }
178
179    public override IDeepCloneable Clone(Cloner cloner) {
180      return new SymbolicClassificationProblem(this, cloner);
181    }
182
183    [StorableHook(HookType.AfterDeserialization)]
184    private void AfterDeserialization() {
185      RegisterParameterEvents();
186    }
187
188    private void RegisterParameterEvents() {
189      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
190      FunctionTreeGrammarParameter.ValueChanged += new EventHandler(FunctionTreeGrammarParameter_ValueChanged);
191
192      MaxFunctionArgumentsParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
193      MaxFunctionDefiningBranchesParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
194      MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
195      MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
196    }
197
198    protected override void OnEvaluatorChanged() {
199      ParameterizeEvaluator();
200      ParameterizeAnalyzers();
201      ParameterizeProblem();
202      base.OnEvaluatorChanged();
203    }
204
205    protected override void OnSolutionCreatorChanged() {
206      ParameterizeSolutionCreator();
207      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
208      base.OnSolutionCreatorChanged();
209    }
210    private void SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged(object sender, System.EventArgs e) {
211      ParameterizeEvaluator();
212      ParameterizeOperators();
213      ParameterizeAnalyzers();
214    }
215
216    protected override void OnClassificationProblemDataChanged() {
217      ParameterizeAnalyzers();
218      ParameterizeGrammar();
219      ParameterizeEvaluator();
220      UpdateEstimationLimits();
221      base.OnClassificationProblemDataChanged();
222    }
223
224    private void FunctionTreeGrammarParameter_ValueChanged(object sender, System.EventArgs e) {
225      if (!(FunctionTreeGrammar is GlobalSymbolicExpressionGrammar)) {
226        FunctionTreeGrammar = new GlobalSymbolicExpressionGrammar(FunctionTreeGrammar);
227      }
228      OnGrammarChanged();
229    }
230    private void OnGrammarChanged() {
231      ParameterizeGrammar();
232    }
233
234    private void ArchitectureParameter_ValueChanged(object sender, EventArgs e) {
235      MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
236      MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
237      OnArchitectureParameterChanged();
238    }
239    private void ArchitectureParameterValue_ValueChanged(object sender, EventArgs e) {
240      OnArchitectureParameterChanged();
241    }
242    private void OnArchitectureParameterChanged() {
243      ParameterizeGrammar();
244    }
245
246    private void InitializeOperators() {
247      Operators.AddRange(ApplicationManager.Manager.GetInstances<ISymbolicExpressionTreeOperator>().OfType<IOperator>());
248      Operators.Add(new MinAverageMaxSymbolicExpressionTreeSizeAnalyzer());
249      Operators.Add(new SymbolicRegressionVariableFrequencyAnalyzer());
250      Operators.Add(new SymbolicExpressionSymbolFrequencyAnalyzer());
251      Operators.Add(new ValidationBestSymbolicClassificationSolutionAnalyzer());
252      Operators.Add(new TrainingBestSymbolicClassificationSolutionAnalyzer());
253    }
254
255    #region operator parameterization
256    private void UpdateEstimationLimits() {
257      if (TrainingSamplesStart.Value < TrainingSamplesEnd.Value &&
258        ClassificationProblemData.Dataset.VariableNames.Contains(ClassificationProblemData.TargetVariable.Value)) {
259        var targetValues = ClassificationProblemData.Dataset.GetVariableValues(ClassificationProblemData.TargetVariable.Value, TrainingSamplesStart.Value, TrainingSamplesEnd.Value);
260        var mean = targetValues.Average();
261        var range = targetValues.Max() - targetValues.Min();
262        UpperEstimationLimit = new DoubleValue(mean + PunishmentFactor.Value * range);
263        LowerEstimationLimit = new DoubleValue(mean - PunishmentFactor.Value * range);
264      }
265    }
266
267    private void ParameterizeEvaluator() {
268      if (Evaluator != null) {
269        Evaluator.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
270        Evaluator.RegressionProblemDataParameter.ActualName = ClassificationProblemDataParameter.Name;
271        Evaluator.SamplesStartParameter.Value = TrainingSamplesStart;
272        Evaluator.SamplesEndParameter.Value = TrainingSamplesEnd;
273      }
274    }
275
276    private void ParameterizeGrammar() {
277      List<LaggedVariable> laggedSymbols = FunctionTreeGrammar.Symbols.OfType<LaggedVariable>().ToList();
278      foreach (Symbol symbol in laggedSymbols)
279        FunctionTreeGrammar.RemoveSymbol(symbol);
280      foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols.Variable>()) {
281        varSymbol.VariableNames = ClassificationProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value);
282      }
283      foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols.VariableCondition>()) {
284        varSymbol.VariableNames = ClassificationProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value);
285      }
286      var globalGrammar = FunctionTreeGrammar as GlobalSymbolicExpressionGrammar;
287      if (globalGrammar != null) {
288        globalGrammar.MaxFunctionArguments = MaxFunctionArguments.Value;
289        globalGrammar.MaxFunctionDefinitions = MaxFunctionDefiningBranches.Value;
290      }
291    }
292
293    private void ParameterizeSolutionCreator() {
294      SolutionCreator.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
295      SolutionCreator.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
296      SolutionCreator.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
297      SolutionCreator.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
298      SolutionCreator.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
299    }
300
301    private void ParameterizeOperators() {
302      foreach (ISymbolicExpressionTreeOperator op in Operators.OfType<ISymbolicExpressionTreeOperator>()) {
303        op.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
304        op.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
305        op.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
306      }
307      foreach (ISymbolicExpressionTreeCrossover op in Operators.OfType<ISymbolicExpressionTreeCrossover>()) {
308        op.ParentsParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
309        op.ChildParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
310      }
311      foreach (ISymbolicExpressionTreeManipulator op in Operators.OfType<ISymbolicExpressionTreeManipulator>()) {
312        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
313      }
314      foreach (ISymbolicExpressionTreeArchitectureManipulator op in Operators.OfType<ISymbolicExpressionTreeArchitectureManipulator>()) {
315        op.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
316        op.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
317      }
318    }
319
320    private void ParameterizeAnalyzers() {
321      foreach (ISymbolicRegressionAnalyzer analyzer in Operators.OfType<ISymbolicRegressionAnalyzer>()) {
322        analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
323        var bestValidationSolutionAnalyzer = analyzer as ValidationBestSymbolicClassificationSolutionAnalyzer;
324        if (bestValidationSolutionAnalyzer != null) {
325          bestValidationSolutionAnalyzer.ClassificationProblemDataParameter.ActualName = ClassificationProblemDataParameter.Name;
326          bestValidationSolutionAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
327          bestValidationSolutionAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
328          bestValidationSolutionAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
329          bestValidationSolutionAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
330          bestValidationSolutionAnalyzer.ValidationSamplesStartParameter.Value = ValidationSamplesStart;
331          bestValidationSolutionAnalyzer.ValidationSamplesEndParameter.Value = ValidationSamplesEnd;
332        }
333        var bestTrainingSolutionAnalyzer = analyzer as TrainingBestSymbolicClassificationSolutionAnalyzer;
334        if (bestTrainingSolutionAnalyzer != null) {
335          bestTrainingSolutionAnalyzer.ProblemDataParameter.ActualName = ClassificationProblemDataParameter.Name;
336          bestTrainingSolutionAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
337          bestTrainingSolutionAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
338          bestTrainingSolutionAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
339          bestTrainingSolutionAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
340        }
341        var varFreqAnalyzer = analyzer as SymbolicRegressionVariableFrequencyAnalyzer;
342        if (varFreqAnalyzer != null) {
343          varFreqAnalyzer.ProblemDataParameter.ActualName = ClassificationProblemDataParameter.Name;
344        }
345      }
346    }
347
348    private void ParameterizeProblem() {
349      if (Maximization != null) {
350        Maximization.Value = Evaluator.Maximization;
351      } else {
352        Maximization = new BoolValue(Evaluator.Maximization);
353      }
354    }
355    #endregion
356  }
357}
Note: See TracBrowser for help on using the repository browser.