source: branches/HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator/SymbolicRegressionSingleObjectiveOSGAEvaluator.cs @ 14280

Last change on this file since 14280 was 14280, checked in by bburlacu, 5 years ago

#2635: Small refactor.

File size: 17.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
34  [Item("SymbolicRegressionSingleObjectiveOSGAEvaluator", "An evaluator which tries to predict when a child will not be able to fullfil offspring selection criteria, to save evaluation time.")]
35  [StorableClass]
36  public class SymbolicRegressionSingleObjectiveOsgaEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
37    private const string RelativeParentChildQualityThresholdParameterName = "RelativeParentChildQualityThreshold";
38    private const string RelativeFitnessEvaluationIntervalSizeParameterName = "RelativeFitnessEvaluationIntervalSize";
39    private const string ResultCollectionParameterName = "Results";
40    private const string AggregateStatisticsParameterName = "AggregateStatistics";
41    private const string ActualSelectionPressureParameterName = "SelectionPressure";
42    private const string UseAdaptiveQualityThresholdParameterName = "UseAdaptiveQualityThreshold";
43    private const string UseFixedEvaluationIntervalsParameterName = "UseFixedEvaluationIntervals";
44
45    #region parameters
46    public IFixedValueParameter<BoolValue> UseFixedEvaluationIntervalsParameter {
47      get { return (IFixedValueParameter<BoolValue>)Parameters[UseFixedEvaluationIntervalsParameterName]; }
48    }
49    public IFixedValueParameter<BoolValue> UseAdaptiveQualityThresholdParameter {
50      get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveQualityThresholdParameterName]; }
51    }
52    public ILookupParameter<DoubleValue> ActualSelectionPressureParameter {
53      get { return (ILookupParameter<DoubleValue>)Parameters[ActualSelectionPressureParameterName]; }
54    }
55    public ILookupParameter<ResultCollection> ResultCollectionParameter {
56      get { return (ILookupParameter<ResultCollection>)Parameters[ResultCollectionParameterName]; }
57    }
58    public IValueParameter<BoolValue> AggregateStatisticsParameter {
59      get { return (IValueParameter<BoolValue>)Parameters[AggregateStatisticsParameterName]; }
60    }
61    public IValueParameter<IntMatrix> RejectedStatsParameter {
62      get { return (IValueParameter<IntMatrix>)Parameters["RejectedStats"]; }
63    }
64    public IValueParameter<IntMatrix> NotRejectedStatsParameter {
65      get { return (IValueParameter<IntMatrix>)Parameters["TotalStats"]; }
66    }
67    public IValueLookupParameter<DoubleValue> ComparisonFactorParameter {
68      get { return (ValueLookupParameter<DoubleValue>)Parameters["ComparisonFactor"]; }
69    }
70    public IFixedValueParameter<PercentValue> RelativeParentChildQualityThresholdParameter {
71      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeParentChildQualityThresholdParameterName]; }
72    }
73    public IFixedValueParameter<PercentValue> RelativeFitnessEvaluationIntervalSizeParameter {
74      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeFitnessEvaluationIntervalSizeParameterName]; }
75    }
76    public IScopeTreeLookupParameter<DoubleValue> ParentQualitiesParameter { get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters["ParentQualities"]; } }
77    #endregion
78
79    #region parameter properties
80    public bool UseFixedEvaluationIntervals {
81      get { return UseFixedEvaluationIntervalsParameter.Value.Value; }
82      set { UseFixedEvaluationIntervalsParameter.Value.Value = value; }
83    }
84    public bool UseAdaptiveQualityThreshold {
85      get { return UseAdaptiveQualityThresholdParameter.Value.Value; }
86      set { UseAdaptiveQualityThresholdParameter.Value.Value = value; }
87    }
88    public double RelativeParentChildQualityThreshold {
89      get { return RelativeParentChildQualityThresholdParameter.Value.Value; }
90      set { RelativeParentChildQualityThresholdParameter.Value.Value = value; }
91    }
92
93    public double RelativeFitnessEvaluationIntervalSize {
94      get { return RelativeFitnessEvaluationIntervalSizeParameter.Value.Value; }
95      set { RelativeFitnessEvaluationIntervalSizeParameter.Value.Value = value; }
96    }
97
98    public IntMatrix RejectedStats {
99      get { return RejectedStatsParameter.Value; }
100      set { RejectedStatsParameter.Value = value; }
101    }
102
103    public IntMatrix TotalStats {
104      get { return NotRejectedStatsParameter.Value; }
105      set { NotRejectedStatsParameter.Value = value; }
106    }
107    #endregion
108
109    public override bool Maximization {
110      get { return true; }
111    }
112
113    public SymbolicRegressionSingleObjectiveOsgaEvaluator() {
114      Parameters.Add(new ValueLookupParameter<DoubleValue>("ComparisonFactor", "Determines if the quality should be compared to the better parent (1.0), to the worse (0.0) or to any linearly interpolated value between them."));
115      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeParentChildQualityThresholdParameterName, new PercentValue(0.9)));
116      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeFitnessEvaluationIntervalSizeParameterName, new PercentValue(0.1)));
117      Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName));
118      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ParentQualities") { ActualName = "Quality" });
119      Parameters.Add(new ValueParameter<IntMatrix>("RejectedStats", new IntMatrix()));
120      Parameters.Add(new ValueParameter<IntMatrix>("TotalStats", new IntMatrix()));
121      Parameters.Add(new ValueParameter<BoolValue>(AggregateStatisticsParameterName, new BoolValue(false)));
122      Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
123      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
124      Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
125    }
126
127    [StorableHook(HookType.AfterDeserialization)]
128    private void AfterDeserialization() {
129      if (!Parameters.ContainsKey(ActualSelectionPressureParameterName))
130        Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
131
132      if (!Parameters.ContainsKey(UseAdaptiveQualityThresholdParameterName))
133        Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
134
135      if (!Parameters.ContainsKey(UseFixedEvaluationIntervalsParameterName))
136        Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
137    }
138
139    [StorableConstructor]
140    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(bool deserializing) : base(deserializing) { }
141
142    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(SymbolicRegressionSingleObjectiveOsgaEvaluator original, Cloner cloner) : base(original, cloner) { }
143
144    public override IDeepCloneable Clone(Cloner cloner) {
145      return new SymbolicRegressionSingleObjectiveOsgaEvaluator(this, cloner);
146    }
147
148    public override void ClearState() {
149      base.ClearState();
150      RejectedStats = new IntMatrix();
151      TotalStats = new IntMatrix();
152    }
153
154    public override IOperation InstrumentedApply() {
155      var solution = SymbolicExpressionTreeParameter.ActualValue;
156      IEnumerable<int> rows = GenerateRowsToEvaluate();
157
158      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
159      var estimationLimits = EstimationLimitsParameter.ActualValue;
160      var problemData = ProblemDataParameter.ActualValue;
161      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
162
163      double quality;
164      var parentQualities = ParentQualitiesParameter.ActualValue;
165
166      // parent subscopes are not present during evaluation of the initial population
167      if (parentQualities.Length > 0) {
168        quality = Calculate(interpreter, solution, estimationLimits, problemData, rows);
169      } else {
170        quality = Calculate(interpreter, solution, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
171      }
172      QualityParameter.ActualValue = new DoubleValue(quality);
173
174      return base.InstrumentedApply();
175    }
176
177    public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
178      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
179      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
180      OnlineCalculatorError errorState;
181
182      double r;
183      if (applyLinearScaling) {
184        var rCalculator = new OnlinePearsonsRCalculator();
185        CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, rCalculator, problemData.Dataset.Rows);
186        errorState = rCalculator.ErrorState;
187        r = rCalculator.R;
188      } else {
189        IEnumerable<double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
190        r = OnlinePearsonsRCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
191      }
192      if (errorState != OnlineCalculatorError.None) return double.NaN;
193      return r * r;
194    }
195
196    private double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, DoubleLimit estimationLimits, IRegressionProblemData problemData, IEnumerable<int> rows) {
197      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows).LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
198      var targetValues = problemData.Dataset.GetReadOnlyDoubleValues(problemData.TargetVariable);
199
200      var parentQualities = ParentQualitiesParameter.ActualValue.Select(x => x.Value);
201      var minQuality = parentQualities.Min();
202      var maxQuality = parentQualities.Max();
203      var comparisonFactor = ComparisonFactorParameter.ActualValue.Value;
204      var parentQuality = minQuality + (maxQuality - minQuality) * comparisonFactor;
205
206      var e = estimatedValues.GetEnumerator();
207
208      #region fixed intervals
209      if (UseFixedEvaluationIntervals) {
210        double threshold = parentQuality * RelativeParentChildQualityThreshold;
211        if (UseAdaptiveQualityThreshold) {
212          var actualSelectionPressure = ActualSelectionPressureParameter.ActualValue;
213          if (actualSelectionPressure != null)
214            threshold = parentQuality * (1 - actualSelectionPressure.Value / 100.0);
215        }
216
217        var pearsonRCalculator = new OnlinePearsonsRCalculator();
218        var targetValuesEnumerator = targetValues.GetEnumerator();
219        var trainingPartitionSize = problemData.TrainingPartition.Size;
220        var interval = (int)Math.Floor(trainingPartitionSize * RelativeFitnessEvaluationIntervalSize);
221
222        var aggregateStatistics = AggregateStatisticsParameter.Value.Value;
223        var i = 0;
224        if (aggregateStatistics) {
225          var trainingEnd = problemData.TrainingPartition.End;
226          var qualityPerInterval = new List<double>();
227          while (targetValuesEnumerator.MoveNext() && e.MoveNext()) {
228            pearsonRCalculator.Add(targetValuesEnumerator.Current, e.Current);
229            ++i;
230            if (i % interval == 0 || i == trainingPartitionSize) {
231              var q = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
232              qualityPerInterval.Add(q * q);
233            }
234          }
235          var r = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
236          var actualQuality = r * r;
237
238          bool predictedRejected = false;
239
240          i = 0;
241          double quality = actualQuality;
242          foreach (var q in qualityPerInterval) {
243            if (double.IsNaN(q) || !(q > threshold)) {
244              predictedRejected = true;
245              quality = q;
246              break;
247            }
248            ++i;
249          }
250
251          var actuallyRejected = !(actualQuality > parentQuality);
252
253          if (RejectedStats.Rows == 0 || TotalStats.Rows == 0) {
254            RejectedStats = new IntMatrix(2, qualityPerInterval.Count);
255            RejectedStats.RowNames = new[] { "Predicted", "Actual" };
256            RejectedStats.ColumnNames = Enumerable.Range(1, RejectedStats.Columns).Select(x => string.Format("0-{0}", Math.Min(trainingEnd, x * interval)));
257            TotalStats = new IntMatrix(2, 2);
258            TotalStats.RowNames = new[] { "Predicted", "Actual" };
259            TotalStats.ColumnNames = new[] { "Rejected", "Not Rejected" };
260          }
261          // gather some statistics
262          if (predictedRejected) {
263            RejectedStats[0, i]++;
264            TotalStats[0, 0]++;
265          } else {
266            TotalStats[0, 1]++;
267          }
268          if (actuallyRejected) {
269            TotalStats[1, 0]++;
270          } else {
271            TotalStats[1, 1]++;
272          }
273          if (predictedRejected && actuallyRejected) {
274            RejectedStats[1, i]++;
275          }
276          return quality;
277        } else {
278          while (targetValuesEnumerator.MoveNext() && e.MoveNext()) {
279            pearsonRCalculator.Add(targetValuesEnumerator.Current, e.Current);
280            ++i;
281            if (i % interval == 0 || i == trainingPartitionSize) {
282              var q = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
283              var quality = q * q;
284              if (!(quality > threshold))
285                return quality;
286            }
287          }
288          var r = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
289          var actualQuality = r * r;
290          return actualQuality;
291        }
292        #endregion
293      } else {
294        var calculator = new OnlinePearsonsRCalculator();
295        var trainingPartitionSize = problemData.TrainingPartition.Size;
296        var interval = (int)Math.Floor(trainingPartitionSize * RelativeFitnessEvaluationIntervalSize);
297        double quality = double.NaN;
298        var estimated = new List<double>(); // save estimated values in a list so we don't re-evaluate
299        // use the actual estimated values for the first i * interval rows of the training partition and and assume the remaining rows are perfectly correlated
300        // if the quality of the individual still falls below the parent quality, then we can reject it sooner, otherwise as i increases the whole estimated series will be used
301        for (int i = 0; i < trainingPartitionSize; i += interval) {
302          calculator.Reset();
303          // save estimated values into the list (for caching)
304          int j = i;
305          int end = Math.Min(trainingPartitionSize, i + interval);
306          while (j < end && e.MoveNext()) {
307            estimated.Add(e.Current);
308            j++;
309          }
310          var start = problemData.TrainingPartition.Start;
311          // add (estimated, target) pairs to the calculator
312          for (j = 0; j < end; ++j) {
313            var index = j + start;
314            calculator.Add(targetValues[index], estimated[j]);
315          }
316          // add (target, target) pairs to the calculator (simulate perfect correlation on the remaining rows)
317          for (; j < trainingPartitionSize; ++j) {
318            var index = j + start;
319            var v = targetValues[index];
320            calculator.Add(v, v);
321          }
322          var r = calculator.ErrorState == OnlineCalculatorError.None ? calculator.R : double.NaN;
323          quality = r * r;
324          if (!(quality > parentQuality))
325            break;
326        }
327        return quality;
328      }
329    }
330
331    public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) {
332      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
333      EstimationLimitsParameter.ExecutionContext = context;
334      ApplyLinearScalingParameter.ExecutionContext = context;
335
336      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
337      var estimationLimits = EstimationLimitsParameter.ActualValue;
338      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
339
340      double r2 = Calculate(interpreter, tree, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
341
342      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
343      EstimationLimitsParameter.ExecutionContext = null;
344      ApplyLinearScalingParameter.ExecutionContext = null;
345
346      return r2;
347    }
348  }
349}
Note: See TracBrowser for help on using the repository browser.