Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.BottomUpTreeDistance/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisProblem.cs @ 11219

Last change on this file since 11219 was 11219, checked in by bburlacu, 10 years ago

#2215: Refactored the tree distance calculators as similarity calculators (extending SingleObjectiveSolutionSimilarityCalculator). Removed ISymbolicExpressionTreeDistanceCalculator interface. Made small performance enhancements to the BottomUpSimilarityCalculator. Added unit tests to check correctness and performance of bottom up similarity. Added SingleObjectivePopulationDiversityAnalyzer in the default operators list along with the BottomUpSimilarityCalculator.

File size: 21.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Drawing;
24using System.Linq;
25using HeuristicLab.Analysis;
26using HeuristicLab.Common;
27using HeuristicLab.Common.Resources;
28using HeuristicLab.Core;
29using HeuristicLab.Data;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Optimization;
32using HeuristicLab.Optimization.Operators;
33using HeuristicLab.Parameters;
34using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
35using HeuristicLab.PluginInfrastructure;
36using HeuristicLab.Problems.DataAnalysis.Symbolic.SimilarityCalculators;
37using HeuristicLab.Problems.Instances;
38
39namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
40  [StorableClass]
41  public abstract class SymbolicDataAnalysisProblem<T, U, V> : HeuristicOptimizationProblem<U, V>, IDataAnalysisProblem<T>, ISymbolicDataAnalysisProblem, IStorableContent,
42    IProblemInstanceConsumer<T>, IProblemInstanceExporter<T>
43    where T : class, IDataAnalysisProblemData
44    where U : class, ISymbolicDataAnalysisEvaluator<T>
45    where V : class, ISymbolicDataAnalysisSolutionCreator {
46
47    #region parameter names & descriptions
48    private const string ProblemDataParameterName = "ProblemData";
49    private const string SymbolicExpressionTreeGrammarParameterName = "SymbolicExpressionTreeGrammar";
50    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
51    private const string MaximumSymbolicExpressionTreeDepthParameterName = "MaximumSymbolicExpressionTreeDepth";
52    private const string MaximumSymbolicExpressionTreeLengthParameterName = "MaximumSymbolicExpressionTreeLength";
53    private const string MaximumFunctionDefinitionsParameterName = "MaximumFunctionDefinitions";
54    private const string MaximumFunctionArgumentsParameterName = "MaximumFunctionArguments";
55    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
56    private const string FitnessCalculationPartitionParameterName = "FitnessCalculationPartition";
57    private const string ValidationPartitionParameterName = "ValidationPartition";
58    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
59
60    private const string ProblemDataParameterDescription = "";
61    private const string SymbolicExpressionTreeGrammarParameterDescription = "The grammar that should be used for symbolic expression tree.";
62    private const string SymoblicExpressionTreeInterpreterParameterDescription = "The interpreter that should be used to evaluate the symbolic expression tree.";
63    private const string MaximumSymbolicExpressionTreeDepthParameterDescription = "Maximal depth of the symbolic expression. The minimum depth needed for the algorithm is 3 because two levels are reserved for the ProgramRoot and the Start symbol.";
64    private const string MaximumSymbolicExpressionTreeLengthParameterDescription = "Maximal length of the symbolic expression.";
65    private const string MaximumFunctionDefinitionsParameterDescription = "Maximal number of automatically defined functions";
66    private const string MaximumFunctionArgumentsParameterDescription = "Maximal number of arguments of automatically defined functions.";
67    private const string RelativeNumberOfEvaluatedSamplesParameterDescription = "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation.";
68    private const string FitnessCalculationPartitionParameterDescription = "The partition of the problem data training partition, that should be used to calculate the fitness of an individual.";
69    private const string ValidationPartitionParameterDescription = "The partition of the problem data training partition, that should be used to select the best model from (optional).";
70    private const string ApplyLinearScalingParameterDescription = "Flag that indicates if the individual should be linearly scaled before evaluating.";
71    #endregion
72
73    #region parameter properties
74    IParameter IDataAnalysisProblem.ProblemDataParameter {
75      get { return ProblemDataParameter; }
76    }
77    public IValueParameter<T> ProblemDataParameter {
78      get { return (IValueParameter<T>)Parameters[ProblemDataParameterName]; }
79    }
80    public IValueParameter<ISymbolicDataAnalysisGrammar> SymbolicExpressionTreeGrammarParameter {
81      get { return (IValueParameter<ISymbolicDataAnalysisGrammar>)Parameters[SymbolicExpressionTreeGrammarParameterName]; }
82    }
83    public IValueParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
84      get { return (IValueParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
85    }
86    public IFixedValueParameter<IntValue> MaximumSymbolicExpressionTreeDepthParameter {
87      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumSymbolicExpressionTreeDepthParameterName]; }
88    }
89    public IFixedValueParameter<IntValue> MaximumSymbolicExpressionTreeLengthParameter {
90      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumSymbolicExpressionTreeLengthParameterName]; }
91    }
92    public IFixedValueParameter<IntValue> MaximumFunctionDefinitionsParameter {
93      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumFunctionDefinitionsParameterName]; }
94    }
95    public IFixedValueParameter<IntValue> MaximumFunctionArgumentsParameter {
96      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumFunctionArgumentsParameterName]; }
97    }
98    public IFixedValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
99      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
100    }
101    public IFixedValueParameter<IntRange> FitnessCalculationPartitionParameter {
102      get { return (IFixedValueParameter<IntRange>)Parameters[FitnessCalculationPartitionParameterName]; }
103    }
104    public IFixedValueParameter<IntRange> ValidationPartitionParameter {
105      get { return (IFixedValueParameter<IntRange>)Parameters[ValidationPartitionParameterName]; }
106    }
107    public IFixedValueParameter<BoolValue> ApplyLinearScalingParameter {
108      get { return (IFixedValueParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
109    }
110    #endregion
111
112    #region properties
113    public string Filename { get; set; }
114    public static new Image StaticItemImage { get { return VSImageLibrary.Type; } }
115
116    IDataAnalysisProblemData IDataAnalysisProblem.ProblemData {
117      get { return ProblemData; }
118    }
119    public T ProblemData {
120      get { return ProblemDataParameter.Value; }
121      set { ProblemDataParameter.Value = value; }
122    }
123
124    public ISymbolicDataAnalysisGrammar SymbolicExpressionTreeGrammar {
125      get { return SymbolicExpressionTreeGrammarParameter.Value; }
126      set { SymbolicExpressionTreeGrammarParameter.Value = value; }
127    }
128    public ISymbolicDataAnalysisExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
129      get { return SymbolicExpressionTreeInterpreterParameter.Value; }
130      set { SymbolicExpressionTreeInterpreterParameter.Value = value; }
131    }
132
133    public IntValue MaximumSymbolicExpressionTreeDepth {
134      get { return MaximumSymbolicExpressionTreeDepthParameter.Value; }
135    }
136    public IntValue MaximumSymbolicExpressionTreeLength {
137      get { return MaximumSymbolicExpressionTreeLengthParameter.Value; }
138    }
139    public IntValue MaximumFunctionDefinitions {
140      get { return MaximumFunctionDefinitionsParameter.Value; }
141    }
142    public IntValue MaximumFunctionArguments {
143      get { return MaximumFunctionArgumentsParameter.Value; }
144    }
145    public PercentValue RelativeNumberOfEvaluatedSamples {
146      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
147    }
148
149    public IntRange FitnessCalculationPartition {
150      get { return FitnessCalculationPartitionParameter.Value; }
151    }
152    public IntRange ValidationPartition {
153      get { return ValidationPartitionParameter.Value; }
154    }
155    public BoolValue ApplyLinearScaling {
156      get { return ApplyLinearScalingParameter.Value; }
157    }
158    #endregion
159
160    [StorableConstructor]
161    protected SymbolicDataAnalysisProblem(bool deserializing) : base(deserializing) { }
162    [StorableHook(HookType.AfterDeserialization)]
163    private void AfterDeserialization() {
164      if (!Parameters.ContainsKey(ApplyLinearScalingParameterName)) {
165        Parameters.Add(new FixedValueParameter<BoolValue>(ApplyLinearScalingParameterName, ApplyLinearScalingParameterDescription, new BoolValue(false)));
166        ApplyLinearScalingParameter.Hidden = true;
167
168        //it is assumed that for all symbolic regression algorithms linear scaling was set to true
169        //there is no possibility to determine the previous value of the parameter as it was stored in the evaluator
170        if (GetType().Name.Contains("SymbolicRegression"))
171          ApplyLinearScaling.Value = true;
172      }
173
174      RegisterEventHandlers();
175    }
176    protected SymbolicDataAnalysisProblem(SymbolicDataAnalysisProblem<T, U, V> original, Cloner cloner)
177      : base(original, cloner) {
178      RegisterEventHandlers();
179    }
180
181    protected SymbolicDataAnalysisProblem(T problemData, U evaluator, V solutionCreator)
182      : base(evaluator, solutionCreator) {
183      Parameters.Add(new ValueParameter<T>(ProblemDataParameterName, ProblemDataParameterDescription, problemData));
184      Parameters.Add(new ValueParameter<ISymbolicDataAnalysisGrammar>(SymbolicExpressionTreeGrammarParameterName, SymbolicExpressionTreeGrammarParameterDescription));
185      Parameters.Add(new ValueParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, SymoblicExpressionTreeInterpreterParameterDescription));
186      Parameters.Add(new FixedValueParameter<IntValue>(MaximumSymbolicExpressionTreeDepthParameterName, MaximumSymbolicExpressionTreeDepthParameterDescription));
187      Parameters.Add(new FixedValueParameter<IntValue>(MaximumSymbolicExpressionTreeLengthParameterName, MaximumSymbolicExpressionTreeLengthParameterDescription));
188      Parameters.Add(new FixedValueParameter<IntValue>(MaximumFunctionDefinitionsParameterName, MaximumFunctionDefinitionsParameterDescription));
189      Parameters.Add(new FixedValueParameter<IntValue>(MaximumFunctionArgumentsParameterName, MaximumFunctionArgumentsParameterDescription));
190      Parameters.Add(new FixedValueParameter<IntRange>(FitnessCalculationPartitionParameterName, FitnessCalculationPartitionParameterDescription));
191      Parameters.Add(new FixedValueParameter<IntRange>(ValidationPartitionParameterName, ValidationPartitionParameterDescription));
192      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, RelativeNumberOfEvaluatedSamplesParameterDescription, new PercentValue(1)));
193      Parameters.Add(new FixedValueParameter<BoolValue>(ApplyLinearScalingParameterName, ApplyLinearScalingParameterDescription, new BoolValue(false)));
194
195      SymbolicExpressionTreeInterpreterParameter.Hidden = true;
196      MaximumFunctionArgumentsParameter.Hidden = true;
197      MaximumFunctionDefinitionsParameter.Hidden = true;
198      ApplyLinearScalingParameter.Hidden = true;
199
200      SymbolicExpressionTreeGrammar = new TypeCoherentExpressionGrammar();
201      SymbolicExpressionTreeInterpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter();
202
203      FitnessCalculationPartition.Start = ProblemData.TrainingPartition.Start;
204      FitnessCalculationPartition.End = ProblemData.TrainingPartition.End;
205
206      InitializeOperators();
207
208      UpdateGrammar();
209      RegisterEventHandlers();
210    }
211
212    protected virtual void UpdateGrammar() {
213      SymbolicExpressionTreeGrammar.MaximumFunctionArguments = MaximumFunctionArguments.Value;
214      SymbolicExpressionTreeGrammar.MaximumFunctionDefinitions = MaximumFunctionDefinitions.Value;
215      foreach (var varSymbol in SymbolicExpressionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Variable>()) {
216        if (!varSymbol.Fixed) {
217          varSymbol.AllVariableNames = ProblemData.InputVariables.Select(x => x.Value);
218          varSymbol.VariableNames = ProblemData.AllowedInputVariables;
219        }
220      }
221      foreach (var varSymbol in SymbolicExpressionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.VariableCondition>()) {
222        if (!varSymbol.Fixed) {
223          varSymbol.AllVariableNames = ProblemData.InputVariables.Select(x => x.Value);
224          varSymbol.VariableNames = ProblemData.AllowedInputVariables;
225        }
226      }
227    }
228
229    private void InitializeOperators() {
230      Operators.AddRange(ApplicationManager.Manager.GetInstances<ISymbolicExpressionTreeOperator>());
231      Operators.AddRange(ApplicationManager.Manager.GetInstances<ISymbolicDataAnalysisExpressionCrossover<T>>());
232      Operators.Add(new SymbolicExpressionSymbolFrequencyAnalyzer());
233      Operators.Add(new SymbolicDataAnalysisVariableFrequencyAnalyzer());
234      Operators.Add(new MinAverageMaxSymbolicExpressionTreeLengthAnalyzer());
235      Operators.Add(new SymbolicExpressionTreeLengthAnalyzer());
236      Operators.Add(new SingleObjectivePopulationDiversityAnalyzer());
237      Operators.Add(new BottomUpSimilarityCalculator());
238      ParameterizeOperators();
239    }
240
241    #region events
242    private void RegisterEventHandlers() {
243      ProblemDataParameter.ValueChanged += new EventHandler(ProblemDataParameter_ValueChanged);
244      ProblemDataParameter.Value.Changed += (object sender, EventArgs e) => OnProblemDataChanged();
245
246      SymbolicExpressionTreeGrammarParameter.ValueChanged += new EventHandler(SymbolicExpressionTreeGrammarParameter_ValueChanged);
247
248      MaximumFunctionArguments.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
249      MaximumFunctionDefinitions.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
250      MaximumSymbolicExpressionTreeDepth.ValueChanged += new EventHandler(MaximumSymbolicExpressionTreeDepth_ValueChanged);
251    }
252
253    private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) {
254      ValidationPartition.Start = 0;
255      ValidationPartition.End = 0;
256      ProblemDataParameter.Value.Changed += (object s, EventArgs args) => OnProblemDataChanged();
257      OnProblemDataChanged();
258    }
259
260    private void SymbolicExpressionTreeGrammarParameter_ValueChanged(object sender, EventArgs e) {
261      UpdateGrammar();
262    }
263
264    private void ArchitectureParameterValue_ValueChanged(object sender, EventArgs e) {
265      UpdateGrammar();
266    }
267
268    private void MaximumSymbolicExpressionTreeDepth_ValueChanged(object sender, EventArgs e) {
269      if (MaximumSymbolicExpressionTreeDepth != null && MaximumSymbolicExpressionTreeDepth.Value < 3)
270        MaximumSymbolicExpressionTreeDepth.Value = 3;
271    }
272
273    protected override void OnSolutionCreatorChanged() {
274      base.OnSolutionCreatorChanged();
275      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
276      ParameterizeOperators();
277    }
278
279    private void SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged(object sender, EventArgs e) {
280      ParameterizeOperators();
281    }
282
283    protected override void OnEvaluatorChanged() {
284      base.OnEvaluatorChanged();
285      ParameterizeOperators();
286    }
287
288    public event EventHandler ProblemDataChanged;
289    protected virtual void OnProblemDataChanged() {
290      FitnessCalculationPartition.Start = ProblemData.TrainingPartition.Start;
291      FitnessCalculationPartition.End = ProblemData.TrainingPartition.End;
292
293      UpdateGrammar();
294      ParameterizeOperators();
295
296      var handler = ProblemDataChanged;
297      if (handler != null) handler(this, EventArgs.Empty);
298
299      OnReset();
300    }
301    #endregion
302
303    protected virtual void ParameterizeOperators() {
304      var operators = Parameters.OfType<IValueParameter>().Select(p => p.Value).OfType<IOperator>().Union(Operators).ToList();
305
306      foreach (var op in operators.OfType<ISymbolicExpressionTreeGrammarBasedOperator>()) {
307        op.SymbolicExpressionTreeGrammarParameter.ActualName = SymbolicExpressionTreeGrammarParameter.Name;
308      }
309      foreach (var op in operators.OfType<ISymbolicExpressionTreeSizeConstraintOperator>()) {
310        op.MaximumSymbolicExpressionTreeDepthParameter.ActualName = MaximumSymbolicExpressionTreeDepthParameter.Name;
311        op.MaximumSymbolicExpressionTreeLengthParameter.ActualName = MaximumSymbolicExpressionTreeLengthParameter.Name;
312      }
313      foreach (var op in operators.OfType<ISymbolicExpressionTreeArchitectureAlteringOperator>()) {
314        op.MaximumFunctionArgumentsParameter.ActualName = MaximumFunctionArgumentsParameter.Name;
315        op.MaximumFunctionDefinitionsParameter.ActualName = MaximumFunctionDefinitionsParameter.Name;
316      }
317      foreach (var op in operators.OfType<ISymbolicDataAnalysisEvaluator<T>>()) {
318        op.ProblemDataParameter.ActualName = ProblemDataParameterName;
319        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
320        op.EvaluationPartitionParameter.ActualName = FitnessCalculationPartitionParameter.Name;
321        op.RelativeNumberOfEvaluatedSamplesParameter.ActualName = RelativeNumberOfEvaluatedSamplesParameter.Name;
322        op.ApplyLinearScalingParameter.ActualName = ApplyLinearScalingParameter.Name;
323      }
324      foreach (var op in operators.OfType<ISymbolicExpressionTreeCrossover>()) {
325        op.ParentsParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
326        op.ChildParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
327      }
328      foreach (var op in operators.OfType<ISymbolicExpressionTreeManipulator>()) {
329        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
330      }
331      foreach (var op in operators.OfType<ISymbolicExpressionTreeAnalyzer>()) {
332        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
333      }
334      foreach (var op in operators.OfType<ISymbolicDataAnalysisSingleObjectiveAnalyzer>()) {
335        op.ApplyLinearScalingParameter.ActualName = ApplyLinearScalingParameter.Name;
336      }
337      foreach (var op in operators.OfType<ISymbolicDataAnalysisMultiObjectiveAnalyzer>()) {
338        op.ApplyLinearScalingParameter.ActualName = ApplyLinearScalingParameter.Name;
339      }
340      foreach (var op in operators.OfType<ISymbolicDataAnalysisAnalyzer>()) {
341        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
342      }
343      foreach (var op in operators.OfType<ISymbolicDataAnalysisValidationAnalyzer<U, T>>()) {
344        op.RelativeNumberOfEvaluatedSamplesParameter.ActualName = RelativeNumberOfEvaluatedSamplesParameter.Name;
345        op.ValidationPartitionParameter.ActualName = ValidationPartitionParameter.Name;
346      }
347      foreach (var op in operators.OfType<ISymbolicDataAnalysisInterpreterOperator>()) {
348        op.SymbolicDataAnalysisTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
349      }
350      foreach (var op in operators.OfType<ISymbolicDataAnalysisExpressionCrossover<T>>()) {
351        op.EvaluationPartitionParameter.ActualName = FitnessCalculationPartitionParameter.Name;
352        op.ProblemDataParameter.ActualName = ProblemDataParameter.Name;
353        op.EvaluationPartitionParameter.ActualName = FitnessCalculationPartitionParameter.Name;
354        op.RelativeNumberOfEvaluatedSamplesParameter.ActualName = RelativeNumberOfEvaluatedSamplesParameter.Name;
355        op.EvaluatorParameter.ActualName = EvaluatorParameter.Name;
356      }
357      foreach (var op in operators.OfType<SingleObjectiveSolutionSimilarityCalculator>()) {
358        op.QualityVariableName = "Quality";
359        op.SolutionVariableName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
360      }
361      foreach (var op in operators.OfType<SingleObjectivePopulationDiversityAnalyzer>()) {
362        op.SimilarityCalculator = operators.OfType<BottomUpSimilarityCalculator>().SingleOrDefault();
363      }
364    }
365
366    #region Import & Export
367    public virtual void Load(T data) {
368      Name = data.Name;
369      Description = data.Description;
370      ProblemData = data;
371    }
372
373    public virtual T Export() {
374      return ProblemData;
375    }
376    #endregion
377  }
378}
Note: See TracBrowser for help on using the repository browser.