source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/UpdateEstimatedValuesOperator.cs @ 12988

Last change on this file since 12988 was 12988, checked in by bburlacu, 6 years ago

#1772: Performance improvement changes

  • QueryMatch.cs: eliminate unnecessary ToList() call and expensive GetBranchLevel calls
  • Diversification: eliminated creation of shallow copies of the individual subscopes as it was either too slow (due to events being registered/deregistered when variables are added to the scope) or too leaky (if attempting to clear the scopes without clearing the variables then the code is leaking EventHandlers)
  • Aggregated diversification statistics separately with the help of some parameters set up in the SchemaCreator and SchemaEvaluator
  • Made code in the UpdateEstimatedValuesOperator perform exactly as in the evaluator (updating quality and estimated values)
  • Removed no longer needed SchemaCleanupOperator
  • Do not evaluate intermediate vertices in the genealogy analyzer if the TrimOlderGenerations flag is activated

New functionality:

  • parameter to control the fraction of the population to be considered by the diversification strategy
  • parameter to control whether individuals may be matched by any schema and mutated only once (exclusive matching)
  • parameter to control whether linear scaling should be applied to the estimated values used for the calculation of phenotypic similarity (default: yes)
File size: 6.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Linq;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.EvolutionTracking;
29using HeuristicLab.Parameters;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31
32namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
33  [Item("UpdateEstimatedValuesOperator", "Put the estimated values of the tree in the scope to be used by the phenotypic similarity calculator")]
34  [StorableClass]
35  public class UpdateEstimatedValuesOperator : EvolutionTrackingOperator<ISymbolicExpressionTree> {
36    private const string ProblemDataParameterName = "ProblemData";
37    private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter";
38    private const string EstimationLimitsParameterName = "EstimationLimits";
39    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
40    private const string ScaleEstimatedValuesParameterName = "ScaleEstimatedValues";
41
42    public ILookupParameter<IRegressionProblemData> ProblemDataParameter {
43      get { return (ILookupParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
44    }
45    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter {
46      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName]; }
47    }
48    public ILookupParameter<DoubleLimit> EstimationLimitsParameter {
49      get { return (ILookupParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName]; }
50    }
51    public ILookupParameter<ISymbolicExpressionTree> SymbolicExpressionTreeParameter {
52      get { return (ILookupParameter<ISymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
53    }
54    public ILookupParameter<BoolValue> ScaleEstimatedValuesParameter {
55      get { return (ILookupParameter<BoolValue>)Parameters[ScaleEstimatedValuesParameterName]; }
56    }
57
58    public UpdateEstimatedValuesOperator() {
59      Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName));
60      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(InterpreterParameterName));
61      Parameters.Add(new LookupParameter<DoubleLimit>(EstimationLimitsParameterName));
62      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(SymbolicExpressionTreeParameterName));
63      Parameters.Add(new LookupParameter<BoolValue>(ScaleEstimatedValuesParameterName));
64    }
65
66    [StorableConstructor]
67    protected UpdateEstimatedValuesOperator(bool deserializing) : base(deserializing) { }
68
69    protected UpdateEstimatedValuesOperator(UpdateEstimatedValuesOperator original, Cloner cloner) : base(original, cloner) {
70    }
71
72    public override IDeepCloneable Clone(Cloner cloner) {
73      return new UpdateEstimatedValuesOperator(this, cloner);
74    }
75
76    public override IOperation Apply() {
77      var tree = SymbolicExpressionTreeParameter.ActualValue;
78      var problemData = ProblemDataParameter.ActualValue;
79      var estimationLimits = EstimationLimitsParameter.ActualValue;
80      var interpreter = InterpreterParameter.ActualValue;
81
82      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices).ToArray();
83      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();
84
85      if (estimatedValues.Length != targetValues.Length)
86        throw new ArgumentException("Number of elements in target and estimated values enumeration do not match.");
87
88      var linearScalingCalculator = new OnlineLinearScalingParameterCalculator();
89
90      for (int i = 0; i < estimatedValues.Length; ++i) {
91        var estimated = estimatedValues[i];
92        var target = targetValues[i];
93        if (!double.IsNaN(estimated) && !double.IsInfinity(estimated))
94          linearScalingCalculator.Add(estimated, target);
95      }
96      double alpha = linearScalingCalculator.Alpha;
97      double beta = linearScalingCalculator.Beta;
98      if (linearScalingCalculator.ErrorState != OnlineCalculatorError.None) {
99        alpha = 0.0;
100        beta = 1.0;
101      }
102
103      var scaled = estimatedValues.Select(x => x * beta + alpha).LimitToRange(estimationLimits.Lower, estimationLimits.Upper).ToArray();
104      OnlineCalculatorError error;
105      var r = OnlinePearsonsRCalculator.Calculate(targetValues, scaled, out error);
106      if (error != OnlineCalculatorError.None) r = double.NaN;
107
108      var r2 = r * r;
109
110      var variables = ExecutionContext.Scope.Variables;
111      ((DoubleValue)variables["Quality"].Value).Value = r2;
112
113      var scaleEstimatedValues = ScaleEstimatedValuesParameter.ActualValue;
114      if (!scaleEstimatedValues.Value)
115        scaled = estimatedValues.LimitToRange(estimationLimits.Lower, estimationLimits.Upper).ToArray();
116
117      if (variables.ContainsKey("EstimatedValues")) {
118        variables["EstimatedValues"].Value = new DoubleArray(scaled);
119      } else {
120        variables.Add(new Core.Variable("EstimatedValues", new DoubleArray(scaled)));
121      }
122      return base.Apply();
123    }
124  }
125}
Note: See TracBrowser for help on using the repository browser.