source: branches/HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator/SymbolicRegressionSingleObjectiveOSGAEvaluator.cs @ 14585

Last change on this file since 14585 was 14585, checked in by bburlacu, 6 years ago

#2635: Limit estimated values to range by default.

File size: 20.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
34  [Item("SymbolicRegressionSingleObjectiveOSGAEvaluator", "An evaluator which tries to predict when a child will not be able to fullfil offspring selection criteria, to save evaluation time.")]
35  [StorableClass]
36  public class SymbolicRegressionSingleObjectiveOsgaEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
37    private const string RelativeParentChildQualityThresholdParameterName = "RelativeParentChildQualityThreshold";
38    private const string RelativeFitnessEvaluationIntervalSizeParameterName = "RelativeFitnessEvaluationIntervalSize";
39    private const string ResultCollectionParameterName = "Results";
40    private const string AggregateStatisticsParameterName = "AggregateStatistics";
41    private const string ActualSelectionPressureParameterName = "SelectionPressure";
42    private const string UseAdaptiveQualityThresholdParameterName = "UseAdaptiveQualityThreshold";
43    private const string UseFixedEvaluationIntervalsParameterName = "UseFixedEvaluationIntervals";
44    private const string PreserveResultCompatibilityParameterName = "PreserveEvaluationResultCompatibility";
45
46    #region parameters
47    public IFixedValueParameter<BoolValue> PreserveResultCompatibilityParameter {
48      get { return (IFixedValueParameter<BoolValue>)Parameters[PreserveResultCompatibilityParameterName]; }
49    }
50    public IFixedValueParameter<BoolValue> UseFixedEvaluationIntervalsParameter {
51      get { return (IFixedValueParameter<BoolValue>)Parameters[UseFixedEvaluationIntervalsParameterName]; }
52    }
53    public IFixedValueParameter<BoolValue> UseAdaptiveQualityThresholdParameter {
54      get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveQualityThresholdParameterName]; }
55    }
56    public ILookupParameter<DoubleValue> ActualSelectionPressureParameter {
57      get { return (ILookupParameter<DoubleValue>)Parameters[ActualSelectionPressureParameterName]; }
58    }
59    public ILookupParameter<ResultCollection> ResultCollectionParameter {
60      get { return (ILookupParameter<ResultCollection>)Parameters[ResultCollectionParameterName]; }
61    }
62    public IValueParameter<BoolValue> AggregateStatisticsParameter {
63      get { return (IValueParameter<BoolValue>)Parameters[AggregateStatisticsParameterName]; }
64    }
65    public IValueLookupParameter<DoubleValue> ComparisonFactorParameter {
66      get { return (ValueLookupParameter<DoubleValue>)Parameters["ComparisonFactor"]; }
67    }
68    public IFixedValueParameter<PercentValue> RelativeParentChildQualityThresholdParameter {
69      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeParentChildQualityThresholdParameterName]; }
70    }
71    public IFixedValueParameter<PercentValue> RelativeFitnessEvaluationIntervalSizeParameter {
72      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeFitnessEvaluationIntervalSizeParameterName]; }
73    }
74    public IScopeTreeLookupParameter<DoubleValue> ParentQualitiesParameter { get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters["ParentQualities"]; } }
75    #endregion
76
77    #region parameter properties
78    public bool AggregateStatistics {
79      get { return AggregateStatisticsParameter.Value.Value; }
80      set { AggregateStatisticsParameter.Value.Value = value; }
81    }
82    public bool PreserveResultCompatibility {
83      get { return PreserveResultCompatibilityParameter.Value.Value; }
84      set { PreserveResultCompatibilityParameter.Value.Value = value; }
85    }
86    public bool UseFixedEvaluationIntervals {
87      get { return UseFixedEvaluationIntervalsParameter.Value.Value; }
88      set { UseFixedEvaluationIntervalsParameter.Value.Value = value; }
89    }
90    public bool UseAdaptiveQualityThreshold {
91      get { return UseAdaptiveQualityThresholdParameter.Value.Value; }
92      set { UseAdaptiveQualityThresholdParameter.Value.Value = value; }
93    }
94    public double RelativeParentChildQualityThreshold {
95      get { return RelativeParentChildQualityThresholdParameter.Value.Value; }
96      set { RelativeParentChildQualityThresholdParameter.Value.Value = value; }
97    }
98    public double RelativeFitnessEvaluationIntervalSize {
99      get { return RelativeFitnessEvaluationIntervalSizeParameter.Value.Value; }
100      set { RelativeFitnessEvaluationIntervalSizeParameter.Value.Value = value; }
101    }
102
103    #endregion
104
105    public override bool Maximization {
106      get { return true; }
107    }
108
109    // keep track of statistics
110    public double AdjustedEvaluatedSolutions { get; set; }
111    public IntMatrix RejectedStats { get; set; }
112    public IntMatrix TotalStats { get; set; }
113
114    public SymbolicRegressionSingleObjectiveOsgaEvaluator() {
115      Parameters.Add(new ValueLookupParameter<DoubleValue>("ComparisonFactor", "Determines if the quality should be compared to the better parent (1.0), to the worse (0.0) or to any linearly interpolated value between them."));
116      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeParentChildQualityThresholdParameterName, new PercentValue(0.9)));
117      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeFitnessEvaluationIntervalSizeParameterName, new PercentValue(0.1)));
118      Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName));
119      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ParentQualities") { ActualName = "Quality" });
120      Parameters.Add(new ValueParameter<BoolValue>(AggregateStatisticsParameterName, new BoolValue(false)));
121      Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
122      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
123      Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
124      Parameters.Add(new FixedValueParameter<BoolValue>(PreserveResultCompatibilityParameterName, new BoolValue(false)));
125
126      RejectedStats = new IntMatrix();
127      TotalStats = new IntMatrix();
128    }
129
130    [StorableHook(HookType.AfterDeserialization)]
131    private void AfterDeserialization() {
132      if (!Parameters.ContainsKey(ActualSelectionPressureParameterName))
133        Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
134
135      if (!Parameters.ContainsKey(UseAdaptiveQualityThresholdParameterName))
136        Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
137
138      if (!Parameters.ContainsKey(UseFixedEvaluationIntervalsParameterName))
139        Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
140
141      if (!Parameters.ContainsKey(PreserveResultCompatibilityParameterName))
142        Parameters.Add(new FixedValueParameter<BoolValue>(PreserveResultCompatibilityParameterName, new BoolValue(false)));
143    }
144
145    [StorableConstructor]
146    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(bool deserializing) : base(deserializing) {
147      TotalStats = new IntMatrix();
148      RejectedStats = new IntMatrix();
149    }
150
151    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(SymbolicRegressionSingleObjectiveOsgaEvaluator original,
152      Cloner cloner) : base(original, cloner) {
153      if (original.TotalStats != null)
154        TotalStats = cloner.Clone(original.TotalStats);
155
156      if (original.RejectedStats != null)
157        RejectedStats = cloner.Clone(original.RejectedStats);
158    }
159
160    public override IDeepCloneable Clone(Cloner cloner) {
161      return new SymbolicRegressionSingleObjectiveOsgaEvaluator(this, cloner);
162    }
163
164    public override void ClearState() {
165      base.ClearState();
166      RejectedStats = new IntMatrix();
167      TotalStats = new IntMatrix();
168      AdjustedEvaluatedSolutions = 0;
169    }
170
171    public override IOperation InstrumentedApply() {
172      var solution = SymbolicExpressionTreeParameter.ActualValue;
173      IEnumerable<int> rows = GenerateRowsToEvaluate();
174
175      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
176      var estimationLimits = EstimationLimitsParameter.ActualValue;
177      var problemData = ProblemDataParameter.ActualValue;
178      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
179
180      double quality;
181      var parentQualities = ParentQualitiesParameter.ActualValue;
182
183      // parent subscopes are not present during evaluation of the initial population
184      if (parentQualities.Length > 0) {
185        quality = Calculate(interpreter, solution, estimationLimits, problemData, rows);
186      } else {
187        quality = Calculate(interpreter, solution, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
188      }
189      QualityParameter.ActualValue = new DoubleValue(quality);
190
191      return base.InstrumentedApply();
192    }
193
194    public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
195      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
196      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
197      OnlineCalculatorError errorState;
198
199      double r;
200      if (applyLinearScaling) {
201        var rCalculator = new OnlinePearsonsRCalculator();
202        CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, rCalculator, problemData.Dataset.Rows);
203        errorState = rCalculator.ErrorState;
204        r = rCalculator.R;
205      } else {
206        IEnumerable<double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
207        r = OnlinePearsonsRCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
208      }
209      if (errorState != OnlineCalculatorError.None) return double.NaN;
210      return r * r;
211    }
212
213    private double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, DoubleLimit estimationLimits, IRegressionProblemData problemData, IEnumerable<int> rows) {
214      var lowerEstimationLimit = EstimationLimitsParameter.ActualValue.Lower;
215      var upperEstimationLimit = EstimationLimitsParameter.ActualValue.Upper;
216      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows).LimitToRange(lowerEstimationLimit, upperEstimationLimit);
217      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows).ToList();
218      var parentQualities = ParentQualitiesParameter.ActualValue.Select(x => x.Value);
219      var minQuality = double.MaxValue;
220      var maxQuality = double.MinValue;
221
222      foreach (var quality in parentQualities) {
223        if (minQuality > quality) minQuality = quality;
224        if (maxQuality < quality) maxQuality = quality;
225      }
226
227      var comparisonFactor = ComparisonFactorParameter.ActualValue.Value;
228      var parentQuality = minQuality + (maxQuality - minQuality) * comparisonFactor;
229
230      #region fixed intervals
231      if (UseFixedEvaluationIntervals) {
232        double threshold = parentQuality * RelativeParentChildQualityThreshold;
233        if (UseAdaptiveQualityThreshold) {
234          var actualSelectionPressure = ActualSelectionPressureParameter.ActualValue;
235          if (actualSelectionPressure != null)
236            threshold = parentQuality * (1 - actualSelectionPressure.Value / 100.0);
237        }
238
239        var estimatedEnumerator = estimatedValues.GetEnumerator();
240        var targetEnumerator = targetValues.GetEnumerator();
241
242        var rcalc = new OnlinePearsonsRCalculator();
243        var trainingPartitionSize = problemData.TrainingPartition.Size;
244        var interval = (int)Math.Floor(trainingPartitionSize * RelativeFitnessEvaluationIntervalSize);
245
246        var calculatedRows = 0;
247        #region aggregate statistics
248        if (AggregateStatistics) {
249          var trainingEnd = problemData.TrainingPartition.End;
250          var qualityPerInterval = new List<double>();
251          while (estimatedEnumerator.MoveNext() & targetEnumerator.MoveNext()) {
252            var estimated = estimatedEnumerator.Current;
253            var target = targetEnumerator.Current;
254            rcalc.Add(estimated, target);
255            ++calculatedRows;
256            if (calculatedRows % interval == 0 || calculatedRows == trainingPartitionSize) {
257              var r = rcalc.ErrorState == OnlineCalculatorError.None ? rcalc.R : 0d;
258              qualityPerInterval.Add(r * r);
259            }
260          }
261          double quality;
262          {
263            var r = rcalc.ErrorState != OnlineCalculatorError.None ? 0d : rcalc.R;
264            var actualQuality = r * r;
265            quality = actualQuality;
266            bool predictedRejected = false;
267
268            int i;
269
270            for (i = 0; i < qualityPerInterval.Count; ++i) {
271              var q = qualityPerInterval[i];
272              if (double.IsNaN(q) || !(q > threshold)) {
273                predictedRejected = true;
274                quality = q;
275                break;
276              }
277            }
278
279            var actuallyRejected = !(actualQuality > parentQuality);
280
281            if (RejectedStats.Rows == 0 || TotalStats.Rows == 0) {
282              RejectedStats = new IntMatrix(2, qualityPerInterval.Count + 1);
283              RejectedStats.RowNames = new[] { "Predicted", "Actual" };
284              RejectedStats.ColumnNames = Enumerable.Range(1, RejectedStats.Columns).Select(x => string.Format("0-{0}", Math.Min(trainingEnd, x * interval)));
285              TotalStats = new IntMatrix(2, 1);
286              TotalStats.RowNames = new[] { "Predicted", "Actual" };
287              TotalStats.ColumnNames = new[] { "Rejected" };
288            }
289
290            if (actuallyRejected) {
291              TotalStats[0, 0]++; // prediction true
292              TotalStats[1, 0]++;
293              RejectedStats[0, i]++;
294              RejectedStats[1, i]++;
295            } else {
296              if (predictedRejected) {
297                RejectedStats[0, i]++;
298                TotalStats[0, 0]++;
299              }
300            }
301          }
302          return quality;
303        }
304        #endregion
305        else {
306          while (estimatedEnumerator.MoveNext() & targetEnumerator.MoveNext()) {
307            rcalc.Add(targetEnumerator.Current, estimatedEnumerator.Current);
308            ++calculatedRows;
309            if (calculatedRows % interval == 0 || calculatedRows == trainingPartitionSize) {
310              var q = rcalc.ErrorState != OnlineCalculatorError.None ? double.NaN : rcalc.R;
311              var quality = q * q;
312              if (!(quality > threshold)) {
313                AdjustedEvaluatedSolutions += (double)calculatedRows / problemData.TrainingPartition.Size;
314                return quality;
315              }
316            }
317          }
318          var r = rcalc.ErrorState != OnlineCalculatorError.None ? double.NaN : rcalc.R;
319          var actualQuality = r * r;
320          AdjustedEvaluatedSolutions += 1d;
321          return actualQuality;
322        }
323        #endregion
324      } else {
325        var lsc = new OnlineLinearScalingParameterCalculator();
326        var rcalc = new OnlinePearsonsRCalculator();
327        var interval = (int)Math.Round(RelativeFitnessEvaluationIntervalSize * problemData.TrainingPartition.Size);
328        var quality = 0d;
329        var calculatedRows = 0;
330
331        var cache = PreserveResultCompatibility ? new List<double>(problemData.TrainingPartition.Size) : null;
332        foreach (var target in estimatedValues.Zip(targetValues, (e, t) => new { EstimatedValue = e, ActualValue = t })) {
333          if (cache != null)
334            cache.Add(target.EstimatedValue);
335
336          lsc.Add(target.EstimatedValue, target.ActualValue);
337          rcalc.Add(target.EstimatedValue, target.ActualValue);
338
339          calculatedRows++;
340
341          if (calculatedRows % interval != 0) continue;
342
343          var alpha = lsc.Alpha;
344          var beta = lsc.Beta;
345          if (lsc.ErrorState != OnlineCalculatorError.None) {
346            alpha = 0;
347            beta = 1;
348          }
349
350          var calc = (OnlinePearsonsRCalculator)rcalc.Clone();
351          foreach (var t in targetValues.Skip(calculatedRows)) {
352            var s = (t - alpha) / beta; // scaled target
353            calc.Add(s, t); // add pair (scaled, target) to the calculator
354          }
355          var r = calc.ErrorState == OnlineCalculatorError.None ? calc.R : 0d;
356          quality = r * r;
357
358          if (!(quality > parentQuality)) {
359            AdjustedEvaluatedSolutions += (double)calculatedRows / problemData.TrainingPartition.Size;
360            return quality;
361          }
362        }
363        if (PreserveResultCompatibility) {
364          // get quality for all the rows. to ensure reproducibility of results between this evaluator
365          // and the standard one, we calculate the quality in an identical way (otherwise the returned
366          // quality could be slightly off due to rounding errors (in the range 1e-15 to 1e-16)
367          var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
368          double r;
369          OnlineCalculatorError calculatorError;
370
371          if (applyLinearScaling) {
372            var alpha = lsc.Alpha;
373            var beta = lsc.Beta;
374            if (lsc.ErrorState != OnlineCalculatorError.None) {
375              alpha = 0;
376              beta = 1;
377            }
378            var boundedEstimatedValues = cache.Select(x => x * beta + alpha).LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
379            r = OnlinePearsonsRCalculator.Calculate(boundedEstimatedValues, targetValues, out calculatorError);
380          } else {
381            var boundedEstimatedValues = cache.LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
382            r = OnlinePearsonsRCalculator.Calculate(boundedEstimatedValues, targetValues, out calculatorError);
383          }
384          quality = calculatorError == OnlineCalculatorError.None ? r * r : 0d;
385        }
386        AdjustedEvaluatedSolutions += 1d;
387        return quality;
388      }
389    }
390
391    public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) {
392      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
393      EstimationLimitsParameter.ExecutionContext = context;
394      ApplyLinearScalingParameter.ExecutionContext = context;
395
396      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
397      var estimationLimits = EstimationLimitsParameter.ActualValue;
398      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
399
400      double r2 = Calculate(interpreter, tree, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
401
402      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
403      EstimationLimitsParameter.ExecutionContext = null;
404      ApplyLinearScalingParameter.ExecutionContext = null;
405
406      return r2;
407    }
408  }
409}
Note: See TracBrowser for help on using the repository browser.