source: branches/HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator/SymbolicRegressionSingleObjectiveOSGAEvaluator.cs @ 14302

Last change on this file since 14302 was 14302, checked in by mkommend, 5 years ago

#2635: Rewrote parts of the OSGA evaluator.

File size: 17.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Linq;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33
34namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
35  [Item("SymbolicRegressionSingleObjectiveOSGAEvaluator", "An evaluator which tries to predict when a child will not be able to fullfil offspring selection criteria, to save evaluation time.")]
36  [StorableClass]
37  public class SymbolicRegressionSingleObjectiveOsgaEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
38    private const string RelativeParentChildQualityThresholdParameterName = "RelativeParentChildQualityThreshold";
39    private const string RelativeFitnessEvaluationIntervalSizeParameterName = "RelativeFitnessEvaluationIntervalSize";
40    private const string ResultCollectionParameterName = "Results";
41    private const string AggregateStatisticsParameterName = "AggregateStatistics";
42    private const string ActualSelectionPressureParameterName = "SelectionPressure";
43    private const string UseAdaptiveQualityThresholdParameterName = "UseAdaptiveQualityThreshold";
44    private const string UseFixedEvaluationIntervalsParameterName = "UseFixedEvaluationIntervals";
45
46    #region parameters
47    public IFixedValueParameter<BoolValue> UseFixedEvaluationIntervalsParameter {
48      get { return (IFixedValueParameter<BoolValue>)Parameters[UseFixedEvaluationIntervalsParameterName]; }
49    }
50    public IFixedValueParameter<BoolValue> UseAdaptiveQualityThresholdParameter {
51      get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveQualityThresholdParameterName]; }
52    }
53    public ILookupParameter<DoubleValue> ActualSelectionPressureParameter {
54      get { return (ILookupParameter<DoubleValue>)Parameters[ActualSelectionPressureParameterName]; }
55    }
56    public ILookupParameter<ResultCollection> ResultCollectionParameter {
57      get { return (ILookupParameter<ResultCollection>)Parameters[ResultCollectionParameterName]; }
58    }
59    public IValueParameter<BoolValue> AggregateStatisticsParameter {
60      get { return (IValueParameter<BoolValue>)Parameters[AggregateStatisticsParameterName]; }
61    }
62    public IValueParameter<IntMatrix> RejectedStatsParameter {
63      get { return (IValueParameter<IntMatrix>)Parameters["RejectedStats"]; }
64    }
65    public IValueParameter<IntMatrix> NotRejectedStatsParameter {
66      get { return (IValueParameter<IntMatrix>)Parameters["TotalStats"]; }
67    }
68    public IValueLookupParameter<DoubleValue> ComparisonFactorParameter {
69      get { return (ValueLookupParameter<DoubleValue>)Parameters["ComparisonFactor"]; }
70    }
71    public IFixedValueParameter<PercentValue> RelativeParentChildQualityThresholdParameter {
72      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeParentChildQualityThresholdParameterName]; }
73    }
74    public IFixedValueParameter<PercentValue> RelativeFitnessEvaluationIntervalSizeParameter {
75      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeFitnessEvaluationIntervalSizeParameterName]; }
76    }
77    public IScopeTreeLookupParameter<DoubleValue> ParentQualitiesParameter { get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters["ParentQualities"]; } }
78    #endregion
79
80    #region parameter properties
81    public bool UseFixedEvaluationIntervals {
82      get { return UseFixedEvaluationIntervalsParameter.Value.Value; }
83      set { UseFixedEvaluationIntervalsParameter.Value.Value = value; }
84    }
85    public bool UseAdaptiveQualityThreshold {
86      get { return UseAdaptiveQualityThresholdParameter.Value.Value; }
87      set { UseAdaptiveQualityThresholdParameter.Value.Value = value; }
88    }
89    public double RelativeParentChildQualityThreshold {
90      get { return RelativeParentChildQualityThresholdParameter.Value.Value; }
91      set { RelativeParentChildQualityThresholdParameter.Value.Value = value; }
92    }
93
94    public double RelativeFitnessEvaluationIntervalSize {
95      get { return RelativeFitnessEvaluationIntervalSizeParameter.Value.Value; }
96      set { RelativeFitnessEvaluationIntervalSizeParameter.Value.Value = value; }
97    }
98
99    public IntMatrix RejectedStats {
100      get { return RejectedStatsParameter.Value; }
101      set { RejectedStatsParameter.Value = value; }
102    }
103
104    public IntMatrix TotalStats {
105      get { return NotRejectedStatsParameter.Value; }
106      set { NotRejectedStatsParameter.Value = value; }
107    }
108    #endregion
109
110    public override bool Maximization {
111      get { return true; }
112    }
113
114    public SymbolicRegressionSingleObjectiveOsgaEvaluator() {
115      Parameters.Add(new ValueLookupParameter<DoubleValue>("ComparisonFactor", "Determines if the quality should be compared to the better parent (1.0), to the worse (0.0) or to any linearly interpolated value between them."));
116      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeParentChildQualityThresholdParameterName, new PercentValue(0.9)));
117      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeFitnessEvaluationIntervalSizeParameterName, new PercentValue(0.1)));
118      Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName));
119      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ParentQualities") { ActualName = "Quality" });
120      Parameters.Add(new ValueParameter<IntMatrix>("RejectedStats", new IntMatrix()));
121      Parameters.Add(new ValueParameter<IntMatrix>("TotalStats", new IntMatrix()));
122      Parameters.Add(new ValueParameter<BoolValue>(AggregateStatisticsParameterName, new BoolValue(false)));
123      Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
124      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
125      Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
126    }
127
128    [StorableHook(HookType.AfterDeserialization)]
129    private void AfterDeserialization() {
130      if (!Parameters.ContainsKey(ActualSelectionPressureParameterName))
131        Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
132
133      if (!Parameters.ContainsKey(UseAdaptiveQualityThresholdParameterName))
134        Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
135
136      if (!Parameters.ContainsKey(UseFixedEvaluationIntervalsParameterName))
137        Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
138    }
139
140    [StorableConstructor]
141    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(bool deserializing) : base(deserializing) { }
142
143    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(SymbolicRegressionSingleObjectiveOsgaEvaluator original, Cloner cloner) : base(original, cloner) { }
144
145    public override IDeepCloneable Clone(Cloner cloner) {
146      return new SymbolicRegressionSingleObjectiveOsgaEvaluator(this, cloner);
147    }
148
149    public override void ClearState() {
150      base.ClearState();
151      RejectedStats = new IntMatrix();
152      TotalStats = new IntMatrix();
153    }
154
155    public override IOperation InstrumentedApply() {
156      var solution = SymbolicExpressionTreeParameter.ActualValue;
157      IEnumerable<int> rows = GenerateRowsToEvaluate();
158
159      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
160      var estimationLimits = EstimationLimitsParameter.ActualValue;
161      var problemData = ProblemDataParameter.ActualValue;
162      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
163
164      double quality;
165      var parentQualities = ParentQualitiesParameter.ActualValue;
166
167      // parent subscopes are not present during evaluation of the initial population
168      if (parentQualities.Length > 0) {
169        quality = Calculate(interpreter, solution, estimationLimits, problemData, rows);
170      } else {
171        quality = Calculate(interpreter, solution, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
172      }
173      QualityParameter.ActualValue = new DoubleValue(quality);
174
175      return base.InstrumentedApply();
176    }
177
178    public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
179      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
180      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
181      OnlineCalculatorError errorState;
182
183      double r;
184      if (applyLinearScaling) {
185        var rCalculator = new OnlinePearsonsRCalculator();
186        CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, rCalculator, problemData.Dataset.Rows);
187        errorState = rCalculator.ErrorState;
188        r = rCalculator.R;
189      } else {
190        IEnumerable<double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
191        r = OnlinePearsonsRCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
192      }
193      if (errorState != OnlineCalculatorError.None) return double.NaN;
194      return r * r;
195    }
196
197    private double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, DoubleLimit estimationLimits, IRegressionProblemData problemData, IEnumerable<int> rows) {
198      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
199      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows).ToList();
200
201      var parentQualities = ParentQualitiesParameter.ActualValue.Select(x => x.Value);
202      var minQuality = parentQualities.Min();
203      var maxQuality = parentQualities.Max();
204      var comparisonFactor = ComparisonFactorParameter.ActualValue.Value;
205      var parentQuality = minQuality + (maxQuality - minQuality) * comparisonFactor;
206
207
208
209      #region fixed intervals
210      if (UseFixedEvaluationIntervals) {
211        var e = estimatedValues.GetEnumerator();
212        double threshold = parentQuality * RelativeParentChildQualityThreshold;
213        if (UseAdaptiveQualityThreshold) {
214          var actualSelectionPressure = ActualSelectionPressureParameter.ActualValue;
215          if (actualSelectionPressure != null)
216            threshold = parentQuality * (1 - actualSelectionPressure.Value / 100.0);
217        }
218
219        var pearsonRCalculator = new OnlinePearsonsRCalculator();
220        var targetValuesEnumerator = targetValues.GetEnumerator();
221        var trainingPartitionSize = problemData.TrainingPartition.Size;
222        var interval = (int)Math.Floor(trainingPartitionSize * RelativeFitnessEvaluationIntervalSize);
223
224        var aggregateStatistics = AggregateStatisticsParameter.Value.Value;
225        var i = 0;
226        if (aggregateStatistics) {
227          var trainingEnd = problemData.TrainingPartition.End;
228          var qualityPerInterval = new List<double>();
229          while (targetValuesEnumerator.MoveNext() && e.MoveNext()) {
230            pearsonRCalculator.Add(targetValuesEnumerator.Current, e.Current);
231            ++i;
232            if (i % interval == 0 || i == trainingPartitionSize) {
233              var q = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
234              qualityPerInterval.Add(q * q);
235            }
236          }
237          var r = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
238          var actualQuality = r * r;
239
240          bool predictedRejected = false;
241
242          i = 0;
243          double quality = actualQuality;
244          foreach (var q in qualityPerInterval) {
245            if (double.IsNaN(q) || !(q > threshold)) {
246              predictedRejected = true;
247              quality = q;
248              break;
249            }
250            ++i;
251          }
252
253          var actuallyRejected = !(actualQuality > parentQuality);
254
255          if (RejectedStats.Rows == 0 || TotalStats.Rows == 0) {
256            RejectedStats = new IntMatrix(2, qualityPerInterval.Count);
257            RejectedStats.RowNames = new[] { "Predicted", "Actual" };
258            RejectedStats.ColumnNames = Enumerable.Range(1, RejectedStats.Columns).Select(x => string.Format("0-{0}", Math.Min(trainingEnd, x * interval)));
259            TotalStats = new IntMatrix(2, 2);
260            TotalStats.RowNames = new[] { "Predicted", "Actual" };
261            TotalStats.ColumnNames = new[] { "Rejected", "Not Rejected" };
262          }
263          // gather some statistics
264          if (predictedRejected) {
265            RejectedStats[0, i]++;
266            TotalStats[0, 0]++;
267          } else {
268            TotalStats[0, 1]++;
269          }
270          if (actuallyRejected) {
271            TotalStats[1, 0]++;
272          } else {
273            TotalStats[1, 1]++;
274          }
275          if (predictedRejected && actuallyRejected) {
276            RejectedStats[1, i]++;
277          }
278          return quality;
279        } else {
280          while (targetValuesEnumerator.MoveNext() && e.MoveNext()) {
281            pearsonRCalculator.Add(targetValuesEnumerator.Current, e.Current);
282            ++i;
283            if (i % interval == 0 || i == trainingPartitionSize) {
284              var q = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
285              var quality = q * q;
286              if (!(quality > threshold))
287                return quality;
288            }
289          }
290          var r = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
291          var actualQuality = r * r;
292          return actualQuality;
293        }
294      #endregion
295      } else {
296        var lsc = new OnlineLinearScalingParameterCalculator();
297        var rcalc = new OnlinePearsonsRCalculator();
298        var actualQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, SymbolicExpressionTreeParameter.ActualValue, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, true);
299
300        var values = estimatedValues.Zip(targetValues, (es, t) => new { Estimated = es, Target = t });
301        int calculatedRows = 0;
302        double quality = 0.0;
303
304        foreach (var value in values) {
305          lsc.Add(value.Estimated, value.Target);
306          rcalc.Add(value.Estimated, value.Target);
307          calculatedRows++;
308
309          if (calculatedRows % 5 == 0) {
310            var alpha = lsc.Alpha;
311            var beta = lsc.Beta;
312
313            OnlinePearsonsRCalculator calc = (OnlinePearsonsRCalculator)rcalc.Clone();
314            foreach (var t in targetValues.Skip(calculatedRows)) {
315              var scaledTarget = (t - alpha) / beta;
316              calc.Add(scaledTarget, t);
317            }
318
319            var r = calc.ErrorState == OnlineCalculatorError.None ? calc.R : double.NaN;
320            quality = r * r;
321
322            if (quality < parentQuality && actualQuality > parentQuality) {
323              Debugger.Break();
324            }
325            if (quality < parentQuality) return quality;
326          }
327        }
328
329        //calculate quality for all rows
330        {
331          var r = rcalc.ErrorState == OnlineCalculatorError.None ? rcalc.R : double.NaN;
332          quality = r * r;
333          if (quality < parentQuality && actualQuality > parentQuality) {
334            Debugger.Break();
335          }
336          if (double.IsNaN(quality)) quality = 0.0;
337          if (quality != actualQuality) Debugger.Break();
338
339          //necessary due to rounding errors and diff in the range of 10E-8
340          quality = actualQuality;
341        }
342
343        return quality;
344      }
345    }
346
347    public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) {
348      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
349      EstimationLimitsParameter.ExecutionContext = context;
350      ApplyLinearScalingParameter.ExecutionContext = context;
351
352      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
353      var estimationLimits = EstimationLimitsParameter.ActualValue;
354      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
355
356      double r2 = Calculate(interpreter, tree, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
357
358      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
359      EstimationLimitsParameter.ExecutionContext = null;
360      ApplyLinearScalingParameter.ExecutionContext = null;
361
362      return r2;
363    }
364  }
365}
Note: See TracBrowser for help on using the repository browser.