source: branches/HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator/SymbolicRegressionSingleObjectiveOSGAEvaluator.cs @ 14609

Last change on this file since 14609 was 14609, checked in by bburlacu, 3 years ago

#2635: Small improvement.

File size: 20.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
34  [Item("SymbolicRegressionSingleObjectiveOSGAEvaluator", "An evaluator which tries to predict when a child will not be able to fullfil offspring selection criteria, to save evaluation time.")]
35  [StorableClass]
36  public class SymbolicRegressionSingleObjectiveOsgaEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
37    private const string RelativeParentChildQualityThresholdParameterName = "RelativeParentChildQualityThreshold";
38    private const string RelativeFitnessEvaluationIntervalSizeParameterName = "RelativeFitnessEvaluationIntervalSize";
39    private const string ResultCollectionParameterName = "Results";
40    private const string AggregateStatisticsParameterName = "AggregateStatistics";
41    private const string ActualSelectionPressureParameterName = "SelectionPressure";
42    private const string UseAdaptiveQualityThresholdParameterName = "UseAdaptiveQualityThreshold";
43    private const string UseFixedEvaluationIntervalsParameterName = "UseFixedEvaluationIntervals";
44    private const string PreserveResultCompatibilityParameterName = "PreserveEvaluationResultCompatibility";
45
46    #region parameters
47    public IFixedValueParameter<BoolValue> PreserveResultCompatibilityParameter {
48      get { return (IFixedValueParameter<BoolValue>)Parameters[PreserveResultCompatibilityParameterName]; }
49    }
50    public IFixedValueParameter<BoolValue> UseFixedEvaluationIntervalsParameter {
51      get { return (IFixedValueParameter<BoolValue>)Parameters[UseFixedEvaluationIntervalsParameterName]; }
52    }
53    public IFixedValueParameter<BoolValue> UseAdaptiveQualityThresholdParameter {
54      get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveQualityThresholdParameterName]; }
55    }
56    public ILookupParameter<DoubleValue> ActualSelectionPressureParameter {
57      get { return (ILookupParameter<DoubleValue>)Parameters[ActualSelectionPressureParameterName]; }
58    }
59    public ILookupParameter<ResultCollection> ResultCollectionParameter {
60      get { return (ILookupParameter<ResultCollection>)Parameters[ResultCollectionParameterName]; }
61    }
62    public IValueParameter<BoolValue> AggregateStatisticsParameter {
63      get { return (IValueParameter<BoolValue>)Parameters[AggregateStatisticsParameterName]; }
64    }
65    public IValueLookupParameter<DoubleValue> ComparisonFactorParameter {
66      get { return (ValueLookupParameter<DoubleValue>)Parameters["ComparisonFactor"]; }
67    }
68    public IFixedValueParameter<PercentValue> RelativeParentChildQualityThresholdParameter {
69      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeParentChildQualityThresholdParameterName]; }
70    }
71    public IFixedValueParameter<PercentValue> RelativeFitnessEvaluationIntervalSizeParameter {
72      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeFitnessEvaluationIntervalSizeParameterName]; }
73    }
74    public IScopeTreeLookupParameter<DoubleValue> ParentQualitiesParameter { get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters["ParentQualities"]; } }
75    #endregion
76
77    #region parameter properties
78    public bool AggregateStatistics {
79      get { return AggregateStatisticsParameter.Value.Value; }
80      set { AggregateStatisticsParameter.Value.Value = value; }
81    }
82    public bool PreserveResultCompatibility {
83      get { return PreserveResultCompatibilityParameter.Value.Value; }
84      set { PreserveResultCompatibilityParameter.Value.Value = value; }
85    }
86    public bool UseFixedEvaluationIntervals {
87      get { return UseFixedEvaluationIntervalsParameter.Value.Value; }
88      set { UseFixedEvaluationIntervalsParameter.Value.Value = value; }
89    }
90    public bool UseAdaptiveQualityThreshold {
91      get { return UseAdaptiveQualityThresholdParameter.Value.Value; }
92      set { UseAdaptiveQualityThresholdParameter.Value.Value = value; }
93    }
94    public double RelativeParentChildQualityThreshold {
95      get { return RelativeParentChildQualityThresholdParameter.Value.Value; }
96      set { RelativeParentChildQualityThresholdParameter.Value.Value = value; }
97    }
98    public double RelativeFitnessEvaluationIntervalSize {
99      get { return RelativeFitnessEvaluationIntervalSizeParameter.Value.Value; }
100      set { RelativeFitnessEvaluationIntervalSizeParameter.Value.Value = value; }
101    }
102
103    #endregion
104
105    public override bool Maximization {
106      get { return true; }
107    }
108
109    // keep track of statistics
110    public double AdjustedEvaluatedSolutions { get; set; }
111    public IntMatrix RejectedStats { get; set; }
112    public IntMatrix TotalStats { get; set; }
113
114    public SymbolicRegressionSingleObjectiveOsgaEvaluator() {
115      Parameters.Add(new ValueLookupParameter<DoubleValue>("ComparisonFactor", "Determines if the quality should be compared to the better parent (1.0), to the worse (0.0) or to any linearly interpolated value between them."));
116      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeParentChildQualityThresholdParameterName, new PercentValue(0.9)));
117      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeFitnessEvaluationIntervalSizeParameterName, new PercentValue(0.1)));
118      Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName));
119      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ParentQualities") { ActualName = "Quality" });
120      Parameters.Add(new ValueParameter<BoolValue>(AggregateStatisticsParameterName, new BoolValue(false)));
121      Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
122      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
123      Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
124      Parameters.Add(new FixedValueParameter<BoolValue>(PreserveResultCompatibilityParameterName, new BoolValue(false)));
125
126      RejectedStats = new IntMatrix();
127      TotalStats = new IntMatrix();
128    }
129
130    [StorableHook(HookType.AfterDeserialization)]
131    private void AfterDeserialization() {
132      if (!Parameters.ContainsKey(ActualSelectionPressureParameterName))
133        Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
134
135      if (!Parameters.ContainsKey(UseAdaptiveQualityThresholdParameterName))
136        Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
137
138      if (!Parameters.ContainsKey(UseFixedEvaluationIntervalsParameterName))
139        Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
140
141      if (!Parameters.ContainsKey(PreserveResultCompatibilityParameterName))
142        Parameters.Add(new FixedValueParameter<BoolValue>(PreserveResultCompatibilityParameterName, new BoolValue(false)));
143    }
144
145    [StorableConstructor]
146    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(bool deserializing) : base(deserializing) {
147      TotalStats = new IntMatrix();
148      RejectedStats = new IntMatrix();
149    }
150
151    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(SymbolicRegressionSingleObjectiveOsgaEvaluator original,
152      Cloner cloner) : base(original, cloner) {
153      if (original.TotalStats != null)
154        TotalStats = cloner.Clone(original.TotalStats);
155
156      if (original.RejectedStats != null)
157        RejectedStats = cloner.Clone(original.RejectedStats);
158    }
159
160    public override IDeepCloneable Clone(Cloner cloner) {
161      return new SymbolicRegressionSingleObjectiveOsgaEvaluator(this, cloner);
162    }
163
164    public override void ClearState() {
165      base.ClearState();
166      RejectedStats = new IntMatrix();
167      TotalStats = new IntMatrix();
168      AdjustedEvaluatedSolutions = 0;
169    }
170
171    public override IOperation InstrumentedApply() {
172      var solution = SymbolicExpressionTreeParameter.ActualValue;
173      IEnumerable<int> rows = GenerateRowsToEvaluate();
174
175      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
176      var estimationLimits = EstimationLimitsParameter.ActualValue;
177      var problemData = ProblemDataParameter.ActualValue;
178      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
179
180      double quality;
181      var parentQualities = ParentQualitiesParameter.ActualValue;
182
183      // parent subscopes are not present during evaluation of the initial population
184      if (parentQualities.Length > 0) {
185        quality = Calculate(interpreter, solution, estimationLimits, problemData, rows);
186      } else {
187        quality = Calculate(interpreter, solution, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
188      }
189      QualityParameter.ActualValue = new DoubleValue(quality);
190
191      return base.InstrumentedApply();
192    }
193
194    public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
195      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
196      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
197      OnlineCalculatorError errorState;
198
199      double r;
200      if (applyLinearScaling) {
201        var rCalculator = new OnlinePearsonsRCalculator();
202        CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, rCalculator, problemData.Dataset.Rows);
203        errorState = rCalculator.ErrorState;
204        r = rCalculator.R;
205      } else {
206        IEnumerable<double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
207        r = OnlinePearsonsRCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
208      }
209      if (errorState != OnlineCalculatorError.None) return double.NaN;
210      return r * r;
211    }
212
213    private double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, DoubleLimit estimationLimits, IRegressionProblemData problemData, IEnumerable<int> rows) {
214      var lowerEstimationLimit = EstimationLimitsParameter.ActualValue.Lower;
215      var upperEstimationLimit = EstimationLimitsParameter.ActualValue.Upper;
216      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows).LimitToRange(lowerEstimationLimit, upperEstimationLimit);
217      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows).ToList();
218      var parentQualities = ParentQualitiesParameter.ActualValue.Select(x => x.Value);
219      var minQuality = double.MaxValue;
220      var maxQuality = double.MinValue;
221
222      foreach (var quality in parentQualities) {
223        if (minQuality > quality) minQuality = quality;
224        if (maxQuality < quality) maxQuality = quality;
225      }
226
227      var comparisonFactor = ComparisonFactorParameter.ActualValue.Value;
228      var parentQuality = minQuality + (maxQuality - minQuality) * comparisonFactor;
229
230      #region fixed intervals
231      if (UseFixedEvaluationIntervals) {
232        double threshold = parentQuality * RelativeParentChildQualityThreshold;
233
234        if (UseAdaptiveQualityThreshold) {
235          var actualSelectionPressure = ActualSelectionPressureParameter.ActualValue;
236          if (actualSelectionPressure != null)
237            threshold = parentQuality * (1 - actualSelectionPressure.Value / 100.0);
238        }
239
240        var estimatedEnumerator = estimatedValues.GetEnumerator();
241        var targetEnumerator = targetValues.GetEnumerator();
242
243        var rcalc = new OnlinePearsonsRCalculator();
244        var trainingPartitionSize = problemData.TrainingPartition.Size;
245        var interval = (int)Math.Floor(trainingPartitionSize * RelativeFitnessEvaluationIntervalSize);
246
247        var calculatedRows = 0;
248        #region aggregate statistics
249        if (AggregateStatistics) {
250          var trainingEnd = problemData.TrainingPartition.End;
251
252          double quality = 0;
253          int intervalCount = 0, rejectionInterval = 0;
254          var predictedRejected = false;
255
256          while (estimatedEnumerator.MoveNext() & targetEnumerator.MoveNext()) {
257            var estimated = estimatedEnumerator.Current;
258            var target = targetEnumerator.Current;
259            rcalc.Add(target, estimated);
260            ++calculatedRows;
261            if (calculatedRows % interval == 0 || calculatedRows == trainingPartitionSize) {
262              intervalCount++;
263              if (predictedRejected) continue;
264              var r = rcalc.ErrorState == OnlineCalculatorError.None ? rcalc.R : 0d;
265              quality = r * r;
266              if (!(quality > threshold)) {
267                rejectionInterval = intervalCount - 1;
268                predictedRejected = true;
269              }
270            }
271          }
272          var actualQuality = rcalc.ErrorState == OnlineCalculatorError.None ? rcalc.R : 0d;
273          actualQuality *= actualQuality;
274
275          if (!predictedRejected) quality = actualQuality;
276
277          var actuallyRejected = !(actualQuality > parentQuality);
278
279          if (RejectedStats.Rows == 0 || TotalStats.Rows == 0) {
280            RejectedStats = new IntMatrix(2, intervalCount + 1);
281            RejectedStats.RowNames = new[] { "Predicted", "Actual" };
282            RejectedStats.ColumnNames = Enumerable.Range(1, RejectedStats.Columns).Select(x => string.Format("0-{0}", Math.Min(trainingEnd, x * interval)));
283            TotalStats = new IntMatrix(2, 1);
284            TotalStats.RowNames = new[] { "Predicted", "Actual" };
285            TotalStats.ColumnNames = new[] { "Rejected" };
286          }
287
288          if (actuallyRejected) {
289            TotalStats[0, 0]++; // prediction true
290            TotalStats[1, 0]++;
291            RejectedStats[0, rejectionInterval]++;
292            RejectedStats[1, rejectionInterval]++;
293          } else {
294            if (predictedRejected) {
295              RejectedStats[0, rejectionInterval]++;
296              TotalStats[0, 0]++;
297            }
298          }
299          return quality;
300        }
301        #endregion
302        else {
303          while (estimatedEnumerator.MoveNext() & targetEnumerator.MoveNext()) {
304            rcalc.Add(targetEnumerator.Current, estimatedEnumerator.Current);
305            ++calculatedRows;
306            if (calculatedRows % interval == 0 || calculatedRows == trainingPartitionSize) {
307              var q = rcalc.ErrorState != OnlineCalculatorError.None ? double.NaN : rcalc.R;
308              var quality = q * q;
309              if (!(quality > threshold)) {
310                AdjustedEvaluatedSolutions += (double)calculatedRows / problemData.TrainingPartition.Size;
311                return quality;
312              }
313            }
314          }
315          var r = rcalc.ErrorState != OnlineCalculatorError.None ? double.NaN : rcalc.R;
316          var actualQuality = r * r;
317          AdjustedEvaluatedSolutions += 1d;
318          return actualQuality;
319        }
320        #endregion
321      } else {
322        var lsc = new OnlineLinearScalingParameterCalculator();
323        var rcalc = new OnlinePearsonsRCalculator();
324        var interval = (int)Math.Round(RelativeFitnessEvaluationIntervalSize * problemData.TrainingPartition.Size);
325        var quality = 0d;
326        var calculatedRows = 0;
327
328        var cache = PreserveResultCompatibility ? new List<double>(problemData.TrainingPartition.Size) : null;
329        foreach (var target in estimatedValues.Zip(targetValues, (e, t) => new { EstimatedValue = e, ActualValue = t })) {
330          if (cache != null)
331            cache.Add(target.EstimatedValue);
332
333          lsc.Add(target.EstimatedValue, target.ActualValue);
334          rcalc.Add(target.EstimatedValue, target.ActualValue);
335
336          calculatedRows++;
337
338          if (calculatedRows % interval != 0) continue;
339
340          var alpha = lsc.Alpha;
341          var beta = lsc.Beta;
342          if (lsc.ErrorState != OnlineCalculatorError.None) {
343            alpha = 0;
344            beta = 1;
345          }
346
347          var calc = (OnlinePearsonsRCalculator)rcalc.Clone();
348          foreach (var t in targetValues.Skip(calculatedRows)) {
349            var s = (t - alpha) / beta; // scaled target
350            calc.Add(s, t); // add pair (scaled, target) to the calculator
351          }
352          var r = calc.ErrorState == OnlineCalculatorError.None ? calc.R : 0d;
353          quality = r * r;
354
355          if (!(quality > parentQuality)) {
356            AdjustedEvaluatedSolutions += (double)calculatedRows / problemData.TrainingPartition.Size;
357            return quality;
358          }
359        }
360        if (PreserveResultCompatibility) {
361          // get quality for all the rows. to ensure reproducibility of results between this evaluator
362          // and the standard one, we calculate the quality in an identical way (otherwise the returned
363          // quality could be slightly off due to rounding errors (in the range 1e-15 to 1e-16)
364          var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
365          double r;
366          OnlineCalculatorError calculatorError;
367
368          if (applyLinearScaling) {
369            var alpha = lsc.Alpha;
370            var beta = lsc.Beta;
371            if (lsc.ErrorState != OnlineCalculatorError.None) {
372              alpha = 0;
373              beta = 1;
374            }
375            var boundedEstimatedValues = cache.Select(x => x * beta + alpha).LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
376            r = OnlinePearsonsRCalculator.Calculate(boundedEstimatedValues, targetValues, out calculatorError);
377          } else {
378            var boundedEstimatedValues = cache.LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
379            r = OnlinePearsonsRCalculator.Calculate(boundedEstimatedValues, targetValues, out calculatorError);
380          }
381          quality = calculatorError == OnlineCalculatorError.None ? r * r : 0d;
382        }
383        AdjustedEvaluatedSolutions += 1d;
384        return quality;
385      }
386    }
387
388    public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) {
389      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
390      EstimationLimitsParameter.ExecutionContext = context;
391      ApplyLinearScalingParameter.ExecutionContext = context;
392
393      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
394      var estimationLimits = EstimationLimitsParameter.ActualValue;
395      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
396
397      double r2 = Calculate(interpreter, tree, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
398
399      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
400      EstimationLimitsParameter.ExecutionContext = null;
401      ApplyLinearScalingParameter.ExecutionContext = null;
402
403      return r2;
404    }
405  }
406}
Note: See TracBrowser for help on using the repository browser.