source: branches/HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator/SymbolicRegressionSingleObjectiveOSGAEvaluator.cs @ 14608

Last change on this file since 14608 was 14608, checked in by bburlacu, 5 years ago

#2635: Simplify code.

File size: 20.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
34  [Item("SymbolicRegressionSingleObjectiveOSGAEvaluator", "An evaluator which tries to predict when a child will not be able to fullfil offspring selection criteria, to save evaluation time.")]
35  [StorableClass]
36  public class SymbolicRegressionSingleObjectiveOsgaEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
37    private const string RelativeParentChildQualityThresholdParameterName = "RelativeParentChildQualityThreshold";
38    private const string RelativeFitnessEvaluationIntervalSizeParameterName = "RelativeFitnessEvaluationIntervalSize";
39    private const string ResultCollectionParameterName = "Results";
40    private const string AggregateStatisticsParameterName = "AggregateStatistics";
41    private const string ActualSelectionPressureParameterName = "SelectionPressure";
42    private const string UseAdaptiveQualityThresholdParameterName = "UseAdaptiveQualityThreshold";
43    private const string UseFixedEvaluationIntervalsParameterName = "UseFixedEvaluationIntervals";
44    private const string PreserveResultCompatibilityParameterName = "PreserveEvaluationResultCompatibility";
45
46    #region parameters
47    public IFixedValueParameter<BoolValue> PreserveResultCompatibilityParameter {
48      get { return (IFixedValueParameter<BoolValue>)Parameters[PreserveResultCompatibilityParameterName]; }
49    }
50    public IFixedValueParameter<BoolValue> UseFixedEvaluationIntervalsParameter {
51      get { return (IFixedValueParameter<BoolValue>)Parameters[UseFixedEvaluationIntervalsParameterName]; }
52    }
53    public IFixedValueParameter<BoolValue> UseAdaptiveQualityThresholdParameter {
54      get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveQualityThresholdParameterName]; }
55    }
56    public ILookupParameter<DoubleValue> ActualSelectionPressureParameter {
57      get { return (ILookupParameter<DoubleValue>)Parameters[ActualSelectionPressureParameterName]; }
58    }
59    public ILookupParameter<ResultCollection> ResultCollectionParameter {
60      get { return (ILookupParameter<ResultCollection>)Parameters[ResultCollectionParameterName]; }
61    }
62    public IValueParameter<BoolValue> AggregateStatisticsParameter {
63      get { return (IValueParameter<BoolValue>)Parameters[AggregateStatisticsParameterName]; }
64    }
65    public IValueLookupParameter<DoubleValue> ComparisonFactorParameter {
66      get { return (ValueLookupParameter<DoubleValue>)Parameters["ComparisonFactor"]; }
67    }
68    public IFixedValueParameter<PercentValue> RelativeParentChildQualityThresholdParameter {
69      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeParentChildQualityThresholdParameterName]; }
70    }
71    public IFixedValueParameter<PercentValue> RelativeFitnessEvaluationIntervalSizeParameter {
72      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeFitnessEvaluationIntervalSizeParameterName]; }
73    }
74    public IScopeTreeLookupParameter<DoubleValue> ParentQualitiesParameter { get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters["ParentQualities"]; } }
75    #endregion
76
77    #region parameter properties
78    public bool AggregateStatistics {
79      get { return AggregateStatisticsParameter.Value.Value; }
80      set { AggregateStatisticsParameter.Value.Value = value; }
81    }
82    public bool PreserveResultCompatibility {
83      get { return PreserveResultCompatibilityParameter.Value.Value; }
84      set { PreserveResultCompatibilityParameter.Value.Value = value; }
85    }
86    public bool UseFixedEvaluationIntervals {
87      get { return UseFixedEvaluationIntervalsParameter.Value.Value; }
88      set { UseFixedEvaluationIntervalsParameter.Value.Value = value; }
89    }
90    public bool UseAdaptiveQualityThreshold {
91      get { return UseAdaptiveQualityThresholdParameter.Value.Value; }
92      set { UseAdaptiveQualityThresholdParameter.Value.Value = value; }
93    }
94    public double RelativeParentChildQualityThreshold {
95      get { return RelativeParentChildQualityThresholdParameter.Value.Value; }
96      set { RelativeParentChildQualityThresholdParameter.Value.Value = value; }
97    }
98    public double RelativeFitnessEvaluationIntervalSize {
99      get { return RelativeFitnessEvaluationIntervalSizeParameter.Value.Value; }
100      set { RelativeFitnessEvaluationIntervalSizeParameter.Value.Value = value; }
101    }
102
103    #endregion
104
105    public override bool Maximization {
106      get { return true; }
107    }
108
109    // keep track of statistics
110    public double AdjustedEvaluatedSolutions { get; set; }
111    public IntMatrix RejectedStats { get; set; }
112    public IntMatrix TotalStats { get; set; }
113
114    public SymbolicRegressionSingleObjectiveOsgaEvaluator() {
115      Parameters.Add(new ValueLookupParameter<DoubleValue>("ComparisonFactor", "Determines if the quality should be compared to the better parent (1.0), to the worse (0.0) or to any linearly interpolated value between them."));
116      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeParentChildQualityThresholdParameterName, new PercentValue(0.9)));
117      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeFitnessEvaluationIntervalSizeParameterName, new PercentValue(0.1)));
118      Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName));
119      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ParentQualities") { ActualName = "Quality" });
120      Parameters.Add(new ValueParameter<BoolValue>(AggregateStatisticsParameterName, new BoolValue(false)));
121      Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
122      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
123      Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
124      Parameters.Add(new FixedValueParameter<BoolValue>(PreserveResultCompatibilityParameterName, new BoolValue(false)));
125
126      RejectedStats = new IntMatrix();
127      TotalStats = new IntMatrix();
128    }
129
130    [StorableHook(HookType.AfterDeserialization)]
131    private void AfterDeserialization() {
132      if (!Parameters.ContainsKey(ActualSelectionPressureParameterName))
133        Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
134
135      if (!Parameters.ContainsKey(UseAdaptiveQualityThresholdParameterName))
136        Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
137
138      if (!Parameters.ContainsKey(UseFixedEvaluationIntervalsParameterName))
139        Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
140
141      if (!Parameters.ContainsKey(PreserveResultCompatibilityParameterName))
142        Parameters.Add(new FixedValueParameter<BoolValue>(PreserveResultCompatibilityParameterName, new BoolValue(false)));
143    }
144
145    [StorableConstructor]
146    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(bool deserializing) : base(deserializing) {
147      TotalStats = new IntMatrix();
148      RejectedStats = new IntMatrix();
149    }
150
151    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(SymbolicRegressionSingleObjectiveOsgaEvaluator original,
152      Cloner cloner) : base(original, cloner) {
153      if (original.TotalStats != null)
154        TotalStats = cloner.Clone(original.TotalStats);
155
156      if (original.RejectedStats != null)
157        RejectedStats = cloner.Clone(original.RejectedStats);
158    }
159
160    public override IDeepCloneable Clone(Cloner cloner) {
161      return new SymbolicRegressionSingleObjectiveOsgaEvaluator(this, cloner);
162    }
163
164    public override void ClearState() {
165      base.ClearState();
166      RejectedStats = new IntMatrix();
167      TotalStats = new IntMatrix();
168      AdjustedEvaluatedSolutions = 0;
169    }
170
171    public override IOperation InstrumentedApply() {
172      var solution = SymbolicExpressionTreeParameter.ActualValue;
173      IEnumerable<int> rows = GenerateRowsToEvaluate();
174
175      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
176      var estimationLimits = EstimationLimitsParameter.ActualValue;
177      var problemData = ProblemDataParameter.ActualValue;
178      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
179
180      double quality;
181      var parentQualities = ParentQualitiesParameter.ActualValue;
182
183      // parent subscopes are not present during evaluation of the initial population
184      if (parentQualities.Length > 0) {
185        quality = Calculate(interpreter, solution, estimationLimits, problemData, rows);
186      } else {
187        quality = Calculate(interpreter, solution, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
188      }
189      QualityParameter.ActualValue = new DoubleValue(quality);
190
191      return base.InstrumentedApply();
192    }
193
194    public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
195      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
196      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
197      OnlineCalculatorError errorState;
198
199      double r;
200      if (applyLinearScaling) {
201        var rCalculator = new OnlinePearsonsRCalculator();
202        CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, rCalculator, problemData.Dataset.Rows);
203        errorState = rCalculator.ErrorState;
204        r = rCalculator.R;
205      } else {
206        IEnumerable<double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
207        r = OnlinePearsonsRCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
208      }
209      if (errorState != OnlineCalculatorError.None) return double.NaN;
210      return r * r;
211    }
212
213    private double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, DoubleLimit estimationLimits, IRegressionProblemData problemData, IEnumerable<int> rows) {
214      var lowerEstimationLimit = EstimationLimitsParameter.ActualValue.Lower;
215      var upperEstimationLimit = EstimationLimitsParameter.ActualValue.Upper;
216      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows).LimitToRange(lowerEstimationLimit, upperEstimationLimit);
217      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows).ToList();
218      var parentQualities = ParentQualitiesParameter.ActualValue.Select(x => x.Value);
219      var minQuality = double.MaxValue;
220      var maxQuality = double.MinValue;
221
222      foreach (var quality in parentQualities) {
223        if (minQuality > quality) minQuality = quality;
224        if (maxQuality < quality) maxQuality = quality;
225      }
226
227      var comparisonFactor = ComparisonFactorParameter.ActualValue.Value;
228      var parentQuality = minQuality + (maxQuality - minQuality) * comparisonFactor;
229
230      #region fixed intervals
231      if (UseFixedEvaluationIntervals) {
232        double threshold = parentQuality * RelativeParentChildQualityThreshold;
233
234        if (UseAdaptiveQualityThreshold) {
235          var actualSelectionPressure = ActualSelectionPressureParameter.ActualValue;
236          if (actualSelectionPressure != null)
237            threshold = parentQuality * (1 - actualSelectionPressure.Value / 100.0);
238        }
239
240        var estimatedEnumerator = estimatedValues.GetEnumerator();
241        var targetEnumerator = targetValues.GetEnumerator();
242
243        var rcalc = new OnlinePearsonsRCalculator();
244        var trainingPartitionSize = problemData.TrainingPartition.Size;
245        var interval = (int)Math.Floor(trainingPartitionSize * RelativeFitnessEvaluationIntervalSize);
246
247        var calculatedRows = 0;
248        #region aggregate statistics
249        if (AggregateStatistics) {
250          var trainingEnd = problemData.TrainingPartition.End;
251
252          double quality = -1;
253          int intervalCount = 0, rejectionInterval = 0;
254          bool predictedRejected = false;
255
256          while (estimatedEnumerator.MoveNext() & targetEnumerator.MoveNext()) {
257            var estimated = estimatedEnumerator.Current;
258            var target = targetEnumerator.Current;
259            rcalc.Add(target, estimated);
260            ++calculatedRows;
261            if (calculatedRows % interval == 0 || calculatedRows == trainingPartitionSize) {
262              intervalCount++;
263              if (predictedRejected) continue;
264              var r = rcalc.ErrorState == OnlineCalculatorError.None ? rcalc.R : 0d;
265              quality = r * r;
266              if (!(quality > threshold)) {
267                rejectionInterval = intervalCount - 1;
268                predictedRejected = true;
269              }
270            }
271          }
272          var actualQuality = rcalc.ErrorState == OnlineCalculatorError.None ? rcalc.R : 0d;
273          actualQuality *= actualQuality;
274
275          var actuallyRejected = !(actualQuality > parentQuality);
276
277          if (RejectedStats.Rows == 0 || TotalStats.Rows == 0) {
278            RejectedStats = new IntMatrix(2, intervalCount + 1);
279            RejectedStats.RowNames = new[] { "Predicted", "Actual" };
280            RejectedStats.ColumnNames = Enumerable.Range(1, RejectedStats.Columns).Select(x => string.Format("0-{0}", Math.Min(trainingEnd, x * interval)));
281            TotalStats = new IntMatrix(2, 1);
282            TotalStats.RowNames = new[] { "Predicted", "Actual" };
283            TotalStats.ColumnNames = new[] { "Rejected" };
284          }
285
286          if (actuallyRejected) {
287            TotalStats[0, 0]++; // prediction true
288            TotalStats[1, 0]++;
289            RejectedStats[0, rejectionInterval]++;
290            RejectedStats[1, rejectionInterval]++;
291          } else {
292            if (predictedRejected) {
293              RejectedStats[0, rejectionInterval]++;
294              TotalStats[0, 0]++;
295            }
296          }
297          return quality;
298        }
299        #endregion
300        else {
301          while (estimatedEnumerator.MoveNext() & targetEnumerator.MoveNext()) {
302            rcalc.Add(targetEnumerator.Current, estimatedEnumerator.Current);
303            ++calculatedRows;
304            if (calculatedRows % interval == 0 || calculatedRows == trainingPartitionSize) {
305              var q = rcalc.ErrorState != OnlineCalculatorError.None ? double.NaN : rcalc.R;
306              var quality = q * q;
307              if (!(quality > threshold)) {
308                AdjustedEvaluatedSolutions += (double)calculatedRows / problemData.TrainingPartition.Size;
309                return quality;
310              }
311            }
312          }
313          var r = rcalc.ErrorState != OnlineCalculatorError.None ? double.NaN : rcalc.R;
314          var actualQuality = r * r;
315          AdjustedEvaluatedSolutions += 1d;
316          return actualQuality;
317        }
318        #endregion
319      } else {
320        var lsc = new OnlineLinearScalingParameterCalculator();
321        var rcalc = new OnlinePearsonsRCalculator();
322        var interval = (int)Math.Round(RelativeFitnessEvaluationIntervalSize * problemData.TrainingPartition.Size);
323        var quality = 0d;
324        var calculatedRows = 0;
325
326        var cache = PreserveResultCompatibility ? new List<double>(problemData.TrainingPartition.Size) : null;
327        foreach (var target in estimatedValues.Zip(targetValues, (e, t) => new { EstimatedValue = e, ActualValue = t })) {
328          if (cache != null)
329            cache.Add(target.EstimatedValue);
330
331          lsc.Add(target.EstimatedValue, target.ActualValue);
332          rcalc.Add(target.EstimatedValue, target.ActualValue);
333
334          calculatedRows++;
335
336          if (calculatedRows % interval != 0) continue;
337
338          var alpha = lsc.Alpha;
339          var beta = lsc.Beta;
340          if (lsc.ErrorState != OnlineCalculatorError.None) {
341            alpha = 0;
342            beta = 1;
343          }
344
345          var calc = (OnlinePearsonsRCalculator)rcalc.Clone();
346          foreach (var t in targetValues.Skip(calculatedRows)) {
347            var s = (t - alpha) / beta; // scaled target
348            calc.Add(s, t); // add pair (scaled, target) to the calculator
349          }
350          var r = calc.ErrorState == OnlineCalculatorError.None ? calc.R : 0d;
351          quality = r * r;
352
353          if (!(quality > parentQuality)) {
354            AdjustedEvaluatedSolutions += (double)calculatedRows / problemData.TrainingPartition.Size;
355            return quality;
356          }
357        }
358        if (PreserveResultCompatibility) {
359          // get quality for all the rows. to ensure reproducibility of results between this evaluator
360          // and the standard one, we calculate the quality in an identical way (otherwise the returned
361          // quality could be slightly off due to rounding errors (in the range 1e-15 to 1e-16)
362          var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
363          double r;
364          OnlineCalculatorError calculatorError;
365
366          if (applyLinearScaling) {
367            var alpha = lsc.Alpha;
368            var beta = lsc.Beta;
369            if (lsc.ErrorState != OnlineCalculatorError.None) {
370              alpha = 0;
371              beta = 1;
372            }
373            var boundedEstimatedValues = cache.Select(x => x * beta + alpha).LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
374            r = OnlinePearsonsRCalculator.Calculate(boundedEstimatedValues, targetValues, out calculatorError);
375          } else {
376            var boundedEstimatedValues = cache.LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
377            r = OnlinePearsonsRCalculator.Calculate(boundedEstimatedValues, targetValues, out calculatorError);
378          }
379          quality = calculatorError == OnlineCalculatorError.None ? r * r : 0d;
380        }
381        AdjustedEvaluatedSolutions += 1d;
382        return quality;
383      }
384    }
385
386    public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) {
387      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
388      EstimationLimitsParameter.ExecutionContext = context;
389      ApplyLinearScalingParameter.ExecutionContext = context;
390
391      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
392      var estimationLimits = EstimationLimitsParameter.ActualValue;
393      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
394
395      double r2 = Calculate(interpreter, tree, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
396
397      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
398      EstimationLimitsParameter.ExecutionContext = null;
399      ApplyLinearScalingParameter.ExecutionContext = null;
400
401      return r2;
402    }
403  }
404}
Note: See TracBrowser for help on using the repository browser.