source: branches/HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator/SymbolicRegressionSingleObjectiveOSGAEvaluator.cs @ 14279

Last change on this file since 14279 was 14279, checked in by bburlacu, 5 years ago

#2635: Implement an alternative way of assessing which offspring should be rejected early: the child is evaluated on part of the training data and the remaining rows are assumed to be perfectly correlated with the target. If the offspring selection criteria is not fulfilled, we can reject early. Otherwise the child is evaluated on an additional slice of the training and so on.

File size: 17.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
34  [Item("SymbolicRegressionSingleObjectiveOSGAEvaluator", "An evaluator which tries to predict when a child will not be able to fullfil offspring selection criteria, to save evaluation time.")]
35  [StorableClass]
36  public class SymbolicRegressionSingleObjectiveOsgaEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
37    private const string RelativeParentChildQualityThresholdParameterName = "RelativeParentChildQualityThreshold";
38    private const string RelativeFitnessEvaluationIntervalSizeParameterName = "RelativeFitnessEvaluationIntervalSize";
39    private const string ResultCollectionParameterName = "Results";
40    private const string AggregateStatisticsParameterName = "AggregateStatistics";
41    private const string ActualSelectionPressureParameterName = "SelectionPressure";
42    private const string UseAdaptiveQualityThresholdParameterName = "UseAdaptiveQualityThreshold";
43    private const string UseFixedEvaluationIntervalsParameterName = "UseFixedEvaluationIntervals";
44
45    #region parameters
46    public IFixedValueParameter<BoolValue> UseFixedEvaluationIntervalsParameter {
47      get { return (IFixedValueParameter<BoolValue>)Parameters[UseFixedEvaluationIntervalsParameterName]; }
48    }
49    public IFixedValueParameter<BoolValue> UseAdaptiveQualityThresholdParameter {
50      get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveQualityThresholdParameterName]; }
51    }
52    public ILookupParameter<DoubleValue> ActualSelectionPressureParameter {
53      get { return (ILookupParameter<DoubleValue>)Parameters[ActualSelectionPressureParameterName]; }
54    }
55    public ILookupParameter<ResultCollection> ResultCollectionParameter {
56      get { return (ILookupParameter<ResultCollection>)Parameters[ResultCollectionParameterName]; }
57    }
58    public IValueParameter<BoolValue> AggregateStatisticsParameter {
59      get { return (IValueParameter<BoolValue>)Parameters[AggregateStatisticsParameterName]; }
60    }
61    public IValueParameter<IntMatrix> RejectedStatsParameter {
62      get { return (IValueParameter<IntMatrix>)Parameters["RejectedStats"]; }
63    }
64    public IValueParameter<IntMatrix> NotRejectedStatsParameter {
65      get { return (IValueParameter<IntMatrix>)Parameters["TotalStats"]; }
66    }
67    public IValueLookupParameter<DoubleValue> ComparisonFactorParameter {
68      get { return (ValueLookupParameter<DoubleValue>)Parameters["ComparisonFactor"]; }
69    }
70    public IFixedValueParameter<PercentValue> RelativeParentChildQualityThresholdParameter {
71      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeParentChildQualityThresholdParameterName]; }
72    }
73    public IFixedValueParameter<PercentValue> RelativeFitnessEvaluationIntervalSizeParameter {
74      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeFitnessEvaluationIntervalSizeParameterName]; }
75    }
76    public IScopeTreeLookupParameter<DoubleValue> ParentQualitiesParameter { get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters["ParentQualities"]; } }
77    #endregion
78
79    #region parameter properties
80    public bool UseFixedEvaluationIntervals {
81      get { return UseFixedEvaluationIntervalsParameter.Value.Value; }
82      set { UseFixedEvaluationIntervalsParameter.Value.Value = value; }
83    }
84    public bool UseAdaptiveQualityThreshold {
85      get { return UseAdaptiveQualityThresholdParameter.Value.Value; }
86      set { UseAdaptiveQualityThresholdParameter.Value.Value = value; }
87    }
88    public double RelativeParentChildQualityThreshold {
89      get { return RelativeParentChildQualityThresholdParameter.Value.Value; }
90      set { RelativeParentChildQualityThresholdParameter.Value.Value = value; }
91    }
92
93    public double RelativeFitnessEvaluationIntervalSize {
94      get { return RelativeFitnessEvaluationIntervalSizeParameter.Value.Value; }
95      set { RelativeFitnessEvaluationIntervalSizeParameter.Value.Value = value; }
96    }
97
98    public IntMatrix RejectedStats {
99      get { return RejectedStatsParameter.Value; }
100      set { RejectedStatsParameter.Value = value; }
101    }
102
103    public IntMatrix TotalStats {
104      get { return NotRejectedStatsParameter.Value; }
105      set { NotRejectedStatsParameter.Value = value; }
106    }
107    #endregion
108
109    public override bool Maximization {
110      get { return true; }
111    }
112
113    public SymbolicRegressionSingleObjectiveOsgaEvaluator() {
114      Parameters.Add(new ValueLookupParameter<DoubleValue>("ComparisonFactor", "Determines if the quality should be compared to the better parent (1.0), to the worse (0.0) or to any linearly interpolated value between them."));
115      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeParentChildQualityThresholdParameterName, new PercentValue(0.9)));
116      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeFitnessEvaluationIntervalSizeParameterName, new PercentValue(0.1)));
117      Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName));
118      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ParentQualities") { ActualName = "Quality" });
119      Parameters.Add(new ValueParameter<IntMatrix>("RejectedStats", new IntMatrix()));
120      Parameters.Add(new ValueParameter<IntMatrix>("TotalStats", new IntMatrix()));
121      Parameters.Add(new ValueParameter<BoolValue>(AggregateStatisticsParameterName, new BoolValue(false)));
122      Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
123      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
124      Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
125    }
126
127    [StorableHook(HookType.AfterDeserialization)]
128    private void AfterDeserialization() {
129      if (!Parameters.ContainsKey(ActualSelectionPressureParameterName))
130        Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
131
132      if (!Parameters.ContainsKey(UseAdaptiveQualityThresholdParameterName))
133        Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
134
135      if (!Parameters.ContainsKey(UseFixedEvaluationIntervalsParameterName))
136        Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
137    }
138
139    [StorableConstructor]
140    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(bool deserializing) : base(deserializing) { }
141
142    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(SymbolicRegressionSingleObjectiveOsgaEvaluator original, Cloner cloner) : base(original, cloner) { }
143
144    public override IDeepCloneable Clone(Cloner cloner) {
145      return new SymbolicRegressionSingleObjectiveOsgaEvaluator(this, cloner);
146    }
147
148    public override void ClearState() {
149      base.ClearState();
150      RejectedStats = new IntMatrix();
151      TotalStats = new IntMatrix();
152    }
153
154    public override IOperation InstrumentedApply() {
155      var solution = SymbolicExpressionTreeParameter.ActualValue;
156      IEnumerable<int> rows = GenerateRowsToEvaluate();
157
158      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
159      var estimationLimits = EstimationLimitsParameter.ActualValue;
160      var problemData = ProblemDataParameter.ActualValue;
161      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
162
163      double quality;
164      var parentQualities = ParentQualitiesParameter.ActualValue;
165
166      // parent subscopes are not present during evaluation of the initial population
167      if (parentQualities.Length > 0) {
168        quality = Calculate(interpreter, solution, estimationLimits, problemData, rows);
169      } else {
170        quality = Calculate(interpreter, solution, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
171      }
172      QualityParameter.ActualValue = new DoubleValue(quality);
173
174      return base.InstrumentedApply();
175    }
176
177    public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
178      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
179      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
180      OnlineCalculatorError errorState;
181
182      double r;
183      if (applyLinearScaling) {
184        var rCalculator = new OnlinePearsonsRCalculator();
185        CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, rCalculator, problemData.Dataset.Rows);
186        errorState = rCalculator.ErrorState;
187        r = rCalculator.R;
188      } else {
189        IEnumerable<double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
190        r = OnlinePearsonsRCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
191      }
192      if (errorState != OnlineCalculatorError.None) return double.NaN;
193      return r * r;
194    }
195
196    private double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, DoubleLimit estimationLimits, IRegressionProblemData problemData, IEnumerable<int> rows) {
197      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows).LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
198      var targetValues = problemData.Dataset.GetReadOnlyDoubleValues(problemData.TargetVariable);
199
200      var parentQualities = ParentQualitiesParameter.ActualValue.Select(x => x.Value);
201      var minQuality = parentQualities.Min();
202      var maxQuality = parentQualities.Max();
203      var comparisonFactor = ComparisonFactorParameter.ActualValue.Value;
204      var parentQuality = minQuality + (maxQuality - minQuality) * comparisonFactor;
205
206      var e = estimatedValues.GetEnumerator();
207
208      if (UseFixedEvaluationIntervals) {
209        double threshold = parentQuality * RelativeParentChildQualityThreshold;
210        if (UseAdaptiveQualityThreshold) {
211          var actualSelectionPressure = ActualSelectionPressureParameter.ActualValue;
212          if (actualSelectionPressure != null)
213            threshold = parentQuality * (1 - actualSelectionPressure.Value / 100.0);
214        }
215
216        var pearsonRCalculator = new OnlinePearsonsRCalculator();
217        var targetValuesEnumerator = targetValues.GetEnumerator();
218        var trainingPartitionSize = problemData.TrainingPartition.Size;
219        var interval = (int)Math.Floor(trainingPartitionSize * RelativeFitnessEvaluationIntervalSize);
220
221        var aggregateStatistics = AggregateStatisticsParameter.Value.Value;
222        var i = 0;
223        if (aggregateStatistics) {
224          var trainingEnd = problemData.TrainingPartition.End;
225          var qualityPerInterval = new List<double>();
226          while (targetValuesEnumerator.MoveNext() && e.MoveNext()) {
227            pearsonRCalculator.Add(targetValuesEnumerator.Current, e.Current);
228            ++i;
229            if (i % interval == 0 || i == trainingPartitionSize) {
230              var q = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
231              qualityPerInterval.Add(q * q);
232            }
233          }
234          var r = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
235          var actualQuality = r * r;
236
237          bool predictedRejected = false;
238
239          i = 0;
240          double quality = actualQuality;
241          foreach (var q in qualityPerInterval) {
242            if (double.IsNaN(q) || !(q > threshold)) {
243              predictedRejected = true;
244              quality = q;
245              break;
246            }
247            ++i;
248          }
249
250          var actuallyRejected = !(actualQuality > parentQuality);
251
252          if (RejectedStats.Rows == 0 || TotalStats.Rows == 0) {
253            RejectedStats = new IntMatrix(2, qualityPerInterval.Count);
254            RejectedStats.RowNames = new[] { "Predicted", "Actual" };
255            RejectedStats.ColumnNames = Enumerable.Range(1, RejectedStats.Columns).Select(x => string.Format("0-{0}", Math.Min(trainingEnd, x * interval)));
256            TotalStats = new IntMatrix(2, 2);
257            TotalStats.RowNames = new[] { "Predicted", "Actual" };
258            TotalStats.ColumnNames = new[] { "Rejected", "Not Rejected" };
259          }
260          // gather some statistics
261          if (predictedRejected) {
262            RejectedStats[0, i]++;
263            TotalStats[0, 0]++;
264          } else {
265            TotalStats[0, 1]++;
266          }
267          if (actuallyRejected) {
268            TotalStats[1, 0]++;
269          } else {
270            TotalStats[1, 1]++;
271          }
272          if (predictedRejected && actuallyRejected) {
273            RejectedStats[1, i]++;
274          }
275          return quality;
276        } else {
277          while (targetValuesEnumerator.MoveNext() && e.MoveNext()) {
278            pearsonRCalculator.Add(targetValuesEnumerator.Current, e.Current);
279            ++i;
280            if (i % interval == 0 || i == trainingPartitionSize) {
281              var q = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
282              var quality = q * q;
283              if (!(quality > threshold))
284                return quality;
285            }
286          }
287          var r = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
288          var actualQuality = r * r;
289          return actualQuality;
290        }
291      } else {
292        var calculator = new OnlinePearsonsRCalculator();
293        var trainingPartitionSize = problemData.TrainingPartition.Size;
294        var interval = (int)Math.Floor(trainingPartitionSize * RelativeFitnessEvaluationIntervalSize);
295        double quality = double.NaN;
296        var estimated = new List<double>(); // save estimated values in a list so we don't re-evaluate
297        // use the actual estimated values for the first i * interval rows of the training partition and and assume the remaining rows are perfectly correlated
298        // if the quality of the individual still falls below the parent quality, then we can reject it sooner, otherwise as i increases the whole estimated series will be used
299        for (int i = 0; i < trainingPartitionSize; i += interval) {
300          int j = i;
301          int end = Math.Min(trainingPartitionSize, i + interval);
302          while (j < end && e.MoveNext()) {
303            estimated.Add(e.Current);
304            j++;
305          }
306
307          var start = problemData.TrainingPartition.Start;
308          calculator.Reset();
309          // add (estimated, target) pairs to the calculator
310          for (j = 0; j < end; ++j)
311            calculator.Add(estimated[j], targetValues[j + start]);
312          // add (target, target) pairs to the calculator (simulate perfect correlation on the remaining rows)
313          for (; j < trainingPartitionSize; ++j) {
314            var index = j + start;
315            calculator.Add(targetValues[index], targetValues[index]);
316          }
317          var r = calculator.ErrorState == OnlineCalculatorError.None ? calculator.R : double.NaN;
318          quality = r * r;
319          if (!(quality > parentQuality))
320            break;
321        }
322        return quality;
323      }
324    }
325
326    public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) {
327      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
328      EstimationLimitsParameter.ExecutionContext = context;
329      ApplyLinearScalingParameter.ExecutionContext = context;
330
331      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
332      var estimationLimits = EstimationLimitsParameter.ActualValue;
333      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
334
335      double r2 = Calculate(interpreter, tree, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
336
337      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
338      EstimationLimitsParameter.ExecutionContext = null;
339      ApplyLinearScalingParameter.ExecutionContext = null;
340
341      return r2;
342    }
343  }
344}
Note: See TracBrowser for help on using the repository browser.