source: branches/HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator/SymbolicRegressionSingleObjectiveOSGAEvaluator.cs @ 14428

Last change on this file since 14428 was 14428, checked in by bburlacu, 5 years ago

#2635: Add analyzer for counting the AdjustedEvaluatedSolutions (according to the actual number of evaluated rows). Add option to preserve compatibility with the standard evaluator. Optimize performance.

File size: 20.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
34  [Item("SymbolicRegressionSingleObjectiveOSGAEvaluator", "An evaluator which tries to predict when a child will not be able to fullfil offspring selection criteria, to save evaluation time.")]
35  [StorableClass]
36  public class SymbolicRegressionSingleObjectiveOsgaEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
37    private const string RelativeParentChildQualityThresholdParameterName = "RelativeParentChildQualityThreshold";
38    private const string RelativeFitnessEvaluationIntervalSizeParameterName = "RelativeFitnessEvaluationIntervalSize";
39    private const string ResultCollectionParameterName = "Results";
40    private const string AggregateStatisticsParameterName = "AggregateStatistics";
41    private const string ActualSelectionPressureParameterName = "SelectionPressure";
42    private const string UseAdaptiveQualityThresholdParameterName = "UseAdaptiveQualityThreshold";
43    private const string UseFixedEvaluationIntervalsParameterName = "UseFixedEvaluationIntervals";
44    private const string PreserveResultCompatibilityParameterName = "PreserveEvaluationResultCompatibility";
45
46    #region parameters
47    public IFixedValueParameter<BoolValue> PreserveResultCompatibilityParameter {
48      get { return (IFixedValueParameter<BoolValue>)Parameters[PreserveResultCompatibilityParameterName]; }
49    }
50    public IFixedValueParameter<BoolValue> UseFixedEvaluationIntervalsParameter {
51      get { return (IFixedValueParameter<BoolValue>)Parameters[UseFixedEvaluationIntervalsParameterName]; }
52    }
53    public IFixedValueParameter<BoolValue> UseAdaptiveQualityThresholdParameter {
54      get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveQualityThresholdParameterName]; }
55    }
56    public ILookupParameter<DoubleValue> ActualSelectionPressureParameter {
57      get { return (ILookupParameter<DoubleValue>)Parameters[ActualSelectionPressureParameterName]; }
58    }
59    public ILookupParameter<ResultCollection> ResultCollectionParameter {
60      get { return (ILookupParameter<ResultCollection>)Parameters[ResultCollectionParameterName]; }
61    }
62    public IValueParameter<BoolValue> AggregateStatisticsParameter {
63      get { return (IValueParameter<BoolValue>)Parameters[AggregateStatisticsParameterName]; }
64    }
65    public IValueParameter<IntMatrix> RejectedStatsParameter {
66      get { return (IValueParameter<IntMatrix>)Parameters["RejectedStats"]; }
67    }
68    public IValueParameter<IntMatrix> NotRejectedStatsParameter {
69      get { return (IValueParameter<IntMatrix>)Parameters["TotalStats"]; }
70    }
71    public IValueLookupParameter<DoubleValue> ComparisonFactorParameter {
72      get { return (ValueLookupParameter<DoubleValue>)Parameters["ComparisonFactor"]; }
73    }
74    public IFixedValueParameter<PercentValue> RelativeParentChildQualityThresholdParameter {
75      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeParentChildQualityThresholdParameterName]; }
76    }
77    public IFixedValueParameter<PercentValue> RelativeFitnessEvaluationIntervalSizeParameter {
78      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeFitnessEvaluationIntervalSizeParameterName]; }
79    }
80    public IScopeTreeLookupParameter<DoubleValue> ParentQualitiesParameter { get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters["ParentQualities"]; } }
81    #endregion
82
83    #region parameter properties
84    public bool AggregateStatistics {
85      get { return AggregateStatisticsParameter.Value.Value; }
86      set { AggregateStatisticsParameter.Value.Value = value; }
87    }
88    public bool PreserveResultCompatibility {
89      get { return PreserveResultCompatibilityParameter.Value.Value; }
90      set { PreserveResultCompatibilityParameter.Value.Value = value; }
91    }
92    public bool UseFixedEvaluationIntervals {
93      get { return UseFixedEvaluationIntervalsParameter.Value.Value; }
94      set { UseFixedEvaluationIntervalsParameter.Value.Value = value; }
95    }
96    public bool UseAdaptiveQualityThreshold {
97      get { return UseAdaptiveQualityThresholdParameter.Value.Value; }
98      set { UseAdaptiveQualityThresholdParameter.Value.Value = value; }
99    }
100    public double RelativeParentChildQualityThreshold {
101      get { return RelativeParentChildQualityThresholdParameter.Value.Value; }
102      set { RelativeParentChildQualityThresholdParameter.Value.Value = value; }
103    }
104    public double RelativeFitnessEvaluationIntervalSize {
105      get { return RelativeFitnessEvaluationIntervalSizeParameter.Value.Value; }
106      set { RelativeFitnessEvaluationIntervalSizeParameter.Value.Value = value; }
107    }
108    public IntMatrix RejectedStats {
109      get { return RejectedStatsParameter.Value; }
110      set { RejectedStatsParameter.Value = value; }
111    }
112    public IntMatrix TotalStats {
113      get { return NotRejectedStatsParameter.Value; }
114      set { NotRejectedStatsParameter.Value = value; }
115    }
116    #endregion
117
118    public override bool Maximization {
119      get { return true; }
120    }
121
122    public double AdjustedEvaluatedSolutions { get; set; }
123
124    public SymbolicRegressionSingleObjectiveOsgaEvaluator() {
125      Parameters.Add(new ValueLookupParameter<DoubleValue>("ComparisonFactor", "Determines if the quality should be compared to the better parent (1.0), to the worse (0.0) or to any linearly interpolated value between them."));
126      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeParentChildQualityThresholdParameterName, new PercentValue(0.9)));
127      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeFitnessEvaluationIntervalSizeParameterName, new PercentValue(0.1)));
128      Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName));
129      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ParentQualities") { ActualName = "Quality" });
130      Parameters.Add(new ValueParameter<IntMatrix>("RejectedStats", new IntMatrix()));
131      Parameters.Add(new ValueParameter<IntMatrix>("TotalStats", new IntMatrix()));
132      Parameters.Add(new ValueParameter<BoolValue>(AggregateStatisticsParameterName, new BoolValue(false)));
133      Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
134      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
135      Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
136      Parameters.Add(new FixedValueParameter<BoolValue>(PreserveResultCompatibilityParameterName, new BoolValue(false)));
137    }
138
139    [StorableHook(HookType.AfterDeserialization)]
140    private void AfterDeserialization() {
141      if (!Parameters.ContainsKey(ActualSelectionPressureParameterName))
142        Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
143
144      if (!Parameters.ContainsKey(UseAdaptiveQualityThresholdParameterName))
145        Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
146
147      if (!Parameters.ContainsKey(UseFixedEvaluationIntervalsParameterName))
148        Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
149
150      if (!Parameters.ContainsKey(PreserveResultCompatibilityParameterName))
151        Parameters.Add(new FixedValueParameter<BoolValue>(PreserveResultCompatibilityParameterName, new BoolValue(false)));
152    }
153
154    [StorableConstructor]
155    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(bool deserializing) : base(deserializing) { }
156
157    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(SymbolicRegressionSingleObjectiveOsgaEvaluator original, Cloner cloner) : base(original, cloner) { }
158
159    public override IDeepCloneable Clone(Cloner cloner) {
160      return new SymbolicRegressionSingleObjectiveOsgaEvaluator(this, cloner);
161    }
162
163    public override void ClearState() {
164      base.ClearState();
165      RejectedStats = new IntMatrix();
166      TotalStats = new IntMatrix();
167      AdjustedEvaluatedSolutions = 0;
168    }
169
170    public override IOperation InstrumentedApply() {
171      var solution = SymbolicExpressionTreeParameter.ActualValue;
172      IEnumerable<int> rows = GenerateRowsToEvaluate();
173
174      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
175      var estimationLimits = EstimationLimitsParameter.ActualValue;
176      var problemData = ProblemDataParameter.ActualValue;
177      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
178
179      double quality;
180      var parentQualities = ParentQualitiesParameter.ActualValue;
181
182      // parent subscopes are not present during evaluation of the initial population
183      if (parentQualities.Length > 0) {
184        quality = Calculate(interpreter, solution, estimationLimits, problemData, rows);
185      } else {
186        quality = Calculate(interpreter, solution, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
187      }
188      QualityParameter.ActualValue = new DoubleValue(quality);
189
190      return base.InstrumentedApply();
191    }
192
193    public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
194      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
195      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
196      OnlineCalculatorError errorState;
197
198      double r;
199      if (applyLinearScaling) {
200        var rCalculator = new OnlinePearsonsRCalculator();
201        CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, rCalculator, problemData.Dataset.Rows);
202        errorState = rCalculator.ErrorState;
203        r = rCalculator.R;
204      } else {
205        IEnumerable<double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
206        r = OnlinePearsonsRCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
207      }
208      if (errorState != OnlineCalculatorError.None) return double.NaN;
209      return r * r;
210    }
211
212    private double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, DoubleLimit estimationLimits, IRegressionProblemData problemData, IEnumerable<int> rows) {
213      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
214      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows).ToList();
215      var parentQualities = ParentQualitiesParameter.ActualValue.Select(x => x.Value);
216      var minQuality = double.MaxValue;
217      var maxQuality = double.MinValue;
218
219      foreach (var quality in parentQualities) {
220        if (minQuality > quality) minQuality = quality;
221        if (maxQuality < quality) maxQuality = quality;
222      }
223
224      var comparisonFactor = ComparisonFactorParameter.ActualValue.Value;
225      var parentQuality = minQuality + (maxQuality - minQuality) * comparisonFactor;
226
227      #region fixed intervals
228      if (UseFixedEvaluationIntervals) {
229        double threshold = parentQuality * RelativeParentChildQualityThreshold;
230        if (UseAdaptiveQualityThreshold) {
231          var actualSelectionPressure = ActualSelectionPressureParameter.ActualValue;
232          if (actualSelectionPressure != null)
233            threshold = parentQuality * (1 - actualSelectionPressure.Value / 100.0);
234        }
235        var estimatedEnumerator = estimatedValues.GetEnumerator();
236        var targetEnumerator = targetValues.GetEnumerator();
237
238        var rcalc = new OnlinePearsonsRCalculator();
239        var trainingPartitionSize = problemData.TrainingPartition.Size;
240        var interval = (int)Math.Floor(trainingPartitionSize * RelativeFitnessEvaluationIntervalSize);
241
242        var calculatedRows = 0;
243        #region aggregate statistics
244        if (AggregateStatistics) {
245          var trainingEnd = problemData.TrainingPartition.End;
246          var qualityPerInterval = new List<double>();
247          while (estimatedEnumerator.MoveNext() & targetEnumerator.MoveNext()) {
248            var estimated = estimatedEnumerator.Current;
249            var target = targetEnumerator.Current;
250            rcalc.Add(estimated, target);
251            ++calculatedRows;
252            if (calculatedRows % interval == 0 || calculatedRows == trainingPartitionSize) {
253              var r = rcalc.ErrorState == OnlineCalculatorError.None ? rcalc.R : 0d;
254              qualityPerInterval.Add(r * r);
255            }
256          }
257          double quality;
258          {
259            var r = rcalc.ErrorState != OnlineCalculatorError.None ? 0d : rcalc.R;
260            var actualQuality = r * r;
261            quality = actualQuality;
262            bool predictedRejected = false;
263
264            calculatedRows = 0;
265            foreach (var q in qualityPerInterval) {
266              if (double.IsNaN(q) || !(q > threshold)) {
267                predictedRejected = true;
268                quality = q;
269                break;
270              }
271              ++calculatedRows;
272            }
273
274            var actuallyRejected = !(actualQuality > parentQuality);
275
276            if (RejectedStats.Rows == 0 || TotalStats.Rows == 0) {
277              RejectedStats = new IntMatrix(2, qualityPerInterval.Count);
278              RejectedStats.RowNames = new[] { "Predicted", "Actual" };
279              RejectedStats.ColumnNames = Enumerable.Range(1, RejectedStats.Columns).Select(x => string.Format("0-{0}", Math.Min(trainingEnd, x * interval)));
280              TotalStats = new IntMatrix(2, 2);
281              TotalStats.RowNames = new[] { "Predicted", "Actual" };
282              TotalStats.ColumnNames = new[] { "Rejected", "Not Rejected" };
283            }
284            // gather some statistics
285            if (predictedRejected) {
286              RejectedStats[0, calculatedRows]++;
287              TotalStats[0, 0]++;
288            } else {
289              TotalStats[0, 1]++;
290            }
291            if (actuallyRejected) {
292              TotalStats[1, 0]++;
293            } else {
294              TotalStats[1, 1]++;
295            }
296            if (predictedRejected && actuallyRejected) {
297              RejectedStats[1, calculatedRows]++;
298            }
299          }
300          return quality;
301        }
302        #endregion
303        else {
304          while (estimatedEnumerator.MoveNext() & targetEnumerator.MoveNext()) {
305            rcalc.Add(targetEnumerator.Current, estimatedEnumerator.Current);
306            ++calculatedRows;
307            if (calculatedRows % interval == 0 || calculatedRows == trainingPartitionSize) {
308              var q = rcalc.ErrorState != OnlineCalculatorError.None ? double.NaN : rcalc.R;
309              var quality = q * q;
310              if (!(quality > threshold)) {
311                AdjustedEvaluatedSolutions += (double)calculatedRows / problemData.TrainingPartition.Size;
312                return quality;
313              }
314            }
315          }
316          var r = rcalc.ErrorState != OnlineCalculatorError.None ? double.NaN : rcalc.R;
317          var actualQuality = r * r;
318          AdjustedEvaluatedSolutions += 1d;
319          return actualQuality;
320        }
321        #endregion
322      } else {
323        var lsc = new OnlineLinearScalingParameterCalculator();
324        var rcalc = new OnlinePearsonsRCalculator();
325        var interval = (int)Math.Round(RelativeFitnessEvaluationIntervalSize * problemData.TrainingPartition.Size);
326        var quality = 0d;
327        var calculatedRows = 0;
328
329        var cache = PreserveResultCompatibility ? new List<double>(problemData.TrainingPartition.Size) : null;
330        foreach (var target in estimatedValues.Zip(targetValues, (e, t) => new { EstimatedValue = e, ActualValue = t })) {
331          if (cache != null)
332            cache.Add(target.EstimatedValue);
333
334          lsc.Add(target.EstimatedValue, target.ActualValue);
335          rcalc.Add(target.EstimatedValue, target.ActualValue);
336
337          calculatedRows++;
338
339          if (calculatedRows % interval != 0) continue;
340
341          var alpha = lsc.Alpha;
342          var beta = lsc.Beta;
343          if (lsc.ErrorState != OnlineCalculatorError.None) {
344            alpha = 0;
345            beta = 1;
346          }
347
348          var calc = (OnlinePearsonsRCalculator)rcalc.Clone();
349          foreach (var t in targetValues.Skip(calculatedRows)) {
350            var s = (t - alpha) / beta; // scaled target
351            calc.Add(s, t); // add pair (scaled, target) to the calculator
352          }
353          var r = calc.ErrorState == OnlineCalculatorError.None ? calc.R : 0d;
354          quality = r * r;
355
356          if (!(quality > parentQuality)) {
357            AdjustedEvaluatedSolutions += (double)calculatedRows / problemData.TrainingPartition.Size;
358            return quality;
359          }
360        }
361        if (PreserveResultCompatibility) {
362          // get quality for all the rows. to ensure reproducibility of results between this evaluator
363          // and the standard one, we calculate the quality in an identical way (otherwise the returned
364          // quality could be slightly off due to rounding errors (in the range 1e-15 to 1e-16)
365          var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
366          double r;
367          OnlineCalculatorError calculatorError;
368
369          if (applyLinearScaling) {
370            var alpha = lsc.Alpha;
371            var beta = lsc.Beta;
372            if (lsc.ErrorState != OnlineCalculatorError.None) {
373              alpha = 0;
374              beta = 1;
375            }
376            var boundedEstimatedValues = cache.Select(x => x * beta + alpha).LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
377            r = OnlinePearsonsRCalculator.Calculate(boundedEstimatedValues, targetValues, out calculatorError);
378          } else {
379            var boundedEstimatedValues = cache.LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
380            r = OnlinePearsonsRCalculator.Calculate(boundedEstimatedValues, targetValues, out calculatorError);
381          }
382          quality = calculatorError == OnlineCalculatorError.None ? r * r : 0d;
383        }
384        AdjustedEvaluatedSolutions += 1d;
385        return quality;
386      }
387    }
388
389    public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) {
390      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
391      EstimationLimitsParameter.ExecutionContext = context;
392      ApplyLinearScalingParameter.ExecutionContext = context;
393
394      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
395      var estimationLimits = EstimationLimitsParameter.ActualValue;
396      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
397
398      double r2 = Calculate(interpreter, tree, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
399
400      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
401      EstimationLimitsParameter.ExecutionContext = null;
402      ApplyLinearScalingParameter.ExecutionContext = null;
403
404      return r2;
405    }
406  }
407}
Note: See TracBrowser for help on using the repository browser.