source: branches/2635_HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator/SymbolicRegressionSingleObjectiveOSGAEvaluator.cs @ 16818

Last change on this file since 16818 was 16818, checked in by bburlacu, 5 months ago

#2635: Update solution to .net framework version 4.6.1, switch to new persistence, add plugin frame file and prebuild event.

File size: 20.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using HEAL.Attic;
23using HeuristicLab.Common;
24using HeuristicLab.Core;
25using HeuristicLab.Data;
26using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using System;
30using System.Collections.Generic;
31using System.Linq;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
34  [Item("SymbolicRegressionSingleObjectiveOSGAEvaluator", "An evaluator which tries to predict when a child will not be able to fullfil offspring selection criteria, to save evaluation time.")]
35  [StorableType("559C6852-9A4F-4C13-9AA5-3D2A44834AC3")]
36  public class SymbolicRegressionSingleObjectiveOsgaEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
37    private const string RelativeParentChildQualityThresholdParameterName = "RelativeParentChildQualityThreshold";
38    private const string RelativeFitnessEvaluationIntervalSizeParameterName = "RelativeFitnessEvaluationIntervalSize";
39    private const string ResultCollectionParameterName = "Results";
40    private const string AggregateStatisticsParameterName = "AggregateStatistics";
41    private const string ActualSelectionPressureParameterName = "SelectionPressure";
42    private const string UseAdaptiveQualityThresholdParameterName = "UseAdaptiveQualityThreshold";
43    private const string UseFixedEvaluationIntervalsParameterName = "UseFixedEvaluationIntervals";
44    private const string PreserveResultCompatibilityParameterName = "PreserveEvaluationResultCompatibility";
45
46    #region parameters
47    public IFixedValueParameter<BoolValue> PreserveResultCompatibilityParameter {
48      get { return (IFixedValueParameter<BoolValue>)Parameters[PreserveResultCompatibilityParameterName]; }
49    }
50    public IFixedValueParameter<BoolValue> UseFixedEvaluationIntervalsParameter {
51      get { return (IFixedValueParameter<BoolValue>)Parameters[UseFixedEvaluationIntervalsParameterName]; }
52    }
53    public IFixedValueParameter<BoolValue> UseAdaptiveQualityThresholdParameter {
54      get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveQualityThresholdParameterName]; }
55    }
56    public ILookupParameter<DoubleValue> ActualSelectionPressureParameter {
57      get { return (ILookupParameter<DoubleValue>)Parameters[ActualSelectionPressureParameterName]; }
58    }
59    public ILookupParameter<ResultCollection> ResultCollectionParameter {
60      get { return (ILookupParameter<ResultCollection>)Parameters[ResultCollectionParameterName]; }
61    }
62    public IValueParameter<BoolValue> AggregateStatisticsParameter {
63      get { return (IValueParameter<BoolValue>)Parameters[AggregateStatisticsParameterName]; }
64    }
65    public IValueLookupParameter<DoubleValue> ComparisonFactorParameter {
66      get { return (ValueLookupParameter<DoubleValue>)Parameters["ComparisonFactor"]; }
67    }
68    public IFixedValueParameter<PercentValue> RelativeParentChildQualityThresholdParameter {
69      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeParentChildQualityThresholdParameterName]; }
70    }
71    public IFixedValueParameter<PercentValue> RelativeFitnessEvaluationIntervalSizeParameter {
72      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeFitnessEvaluationIntervalSizeParameterName]; }
73    }
74    public IScopeTreeLookupParameter<DoubleValue> ParentQualitiesParameter { get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters["ParentQualities"]; } }
75    #endregion
76
77    #region parameter properties
78    public bool AggregateStatistics {
79      get { return AggregateStatisticsParameter.Value.Value; }
80      set { AggregateStatisticsParameter.Value.Value = value; }
81    }
82    public bool PreserveResultCompatibility {
83      get { return PreserveResultCompatibilityParameter.Value.Value; }
84      set { PreserveResultCompatibilityParameter.Value.Value = value; }
85    }
86    public bool UseFixedEvaluationIntervals {
87      get { return UseFixedEvaluationIntervalsParameter.Value.Value; }
88      set { UseFixedEvaluationIntervalsParameter.Value.Value = value; }
89    }
90    public bool UseAdaptiveQualityThreshold {
91      get { return UseAdaptiveQualityThresholdParameter.Value.Value; }
92      set { UseAdaptiveQualityThresholdParameter.Value.Value = value; }
93    }
94    public double RelativeParentChildQualityThreshold {
95      get { return RelativeParentChildQualityThresholdParameter.Value.Value; }
96      set { RelativeParentChildQualityThresholdParameter.Value.Value = value; }
97    }
98    public double RelativeFitnessEvaluationIntervalSize {
99      get { return RelativeFitnessEvaluationIntervalSizeParameter.Value.Value; }
100      set { RelativeFitnessEvaluationIntervalSizeParameter.Value.Value = value; }
101    }
102
103    #endregion
104
105    public override bool Maximization {
106      get { return true; }
107    }
108
109    // keep track of statistics
110    [Storable]
111    public double AdjustedEvaluatedSolutions { get; set; }
112    [Storable]
113    public IntMatrix RejectedStats { get; set; }
114    [Storable]
115    public IntMatrix TotalStats { get; set; }
116
117    public SymbolicRegressionSingleObjectiveOsgaEvaluator() {
118      Parameters.Add(new ValueLookupParameter<DoubleValue>("ComparisonFactor", "Determines if the quality should be compared to the better parent (1.0), to the worse (0.0) or to any linearly interpolated value between them."));
119      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeParentChildQualityThresholdParameterName, new PercentValue(0.9)));
120      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeFitnessEvaluationIntervalSizeParameterName, new PercentValue(0.1)));
121      Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName));
122      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ParentQualities") { ActualName = "Quality" });
123      Parameters.Add(new ValueParameter<BoolValue>(AggregateStatisticsParameterName, new BoolValue(false)));
124      Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
125      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
126      Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
127      Parameters.Add(new FixedValueParameter<BoolValue>(PreserveResultCompatibilityParameterName, new BoolValue(false)));
128
129      RejectedStats = new IntMatrix();
130      TotalStats = new IntMatrix();
131    }
132
133    [StorableHook(HookType.AfterDeserialization)]
134    private void AfterDeserialization() {
135      if (!Parameters.ContainsKey(ActualSelectionPressureParameterName))
136        Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
137
138      if (!Parameters.ContainsKey(UseAdaptiveQualityThresholdParameterName))
139        Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
140
141      if (!Parameters.ContainsKey(UseFixedEvaluationIntervalsParameterName))
142        Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
143
144      if (!Parameters.ContainsKey(PreserveResultCompatibilityParameterName))
145        Parameters.Add(new FixedValueParameter<BoolValue>(PreserveResultCompatibilityParameterName, new BoolValue(false)));
146    }
147
148    [StorableConstructor]
149    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(StorableConstructorFlag _) : base(_) {
150      TotalStats = new IntMatrix();
151      RejectedStats = new IntMatrix();
152    }
153
154    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(SymbolicRegressionSingleObjectiveOsgaEvaluator original,
155      Cloner cloner) : base(original, cloner) {
156      if (original.TotalStats != null)
157        TotalStats = cloner.Clone(original.TotalStats);
158
159      if (original.RejectedStats != null)
160        RejectedStats = cloner.Clone(original.RejectedStats);
161    }
162
163    public override IDeepCloneable Clone(Cloner cloner) {
164      return new SymbolicRegressionSingleObjectiveOsgaEvaluator(this, cloner);
165    }
166
167    public override void ClearState() {
168      base.ClearState();
169      RejectedStats = new IntMatrix();
170      TotalStats = new IntMatrix();
171      AdjustedEvaluatedSolutions = 0;
172    }
173
174    public override IOperation InstrumentedApply() {
175      var solution = SymbolicExpressionTreeParameter.ActualValue;
176      IEnumerable<int> rows = GenerateRowsToEvaluate();
177
178      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
179      var estimationLimits = EstimationLimitsParameter.ActualValue;
180      var problemData = ProblemDataParameter.ActualValue;
181      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
182
183      double quality;
184      var parentQualities = ParentQualitiesParameter.ActualValue;
185
186      // parent subscopes are not present during evaluation of the initial population
187      if (parentQualities.Length > 0) {
188        quality = Calculate(interpreter, solution, estimationLimits, problemData, rows);
189      } else {
190        quality = Calculate(interpreter, solution, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
191      }
192      QualityParameter.ActualValue = new DoubleValue(quality);
193
194      return base.InstrumentedApply();
195    }
196
197    public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
198      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
199      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
200      OnlineCalculatorError errorState;
201
202      double r;
203      if (applyLinearScaling) {
204        var rCalculator = new OnlinePearsonsRCalculator();
205        CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, rCalculator, problemData.Dataset.Rows);
206        errorState = rCalculator.ErrorState;
207        r = rCalculator.R;
208      } else {
209        IEnumerable<double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
210        r = OnlinePearsonsRCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
211      }
212      if (errorState != OnlineCalculatorError.None) return double.NaN;
213      return r * r;
214    }
215
216    private double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, DoubleLimit estimationLimits, IRegressionProblemData problemData, IEnumerable<int> rows) {
217      var lowerEstimationLimit = EstimationLimitsParameter.ActualValue.Lower;
218      var upperEstimationLimit = EstimationLimitsParameter.ActualValue.Upper;
219      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows).LimitToRange(lowerEstimationLimit, upperEstimationLimit);
220      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows).ToList();
221      var parentQualities = ParentQualitiesParameter.ActualValue.Select(x => x.Value);
222      var minQuality = double.MaxValue;
223      var maxQuality = double.MinValue;
224
225      foreach (var quality in parentQualities) {
226        if (minQuality > quality) minQuality = quality;
227        if (maxQuality < quality) maxQuality = quality;
228      }
229
230      var comparisonFactor = ComparisonFactorParameter.ActualValue.Value;
231      var parentQuality = minQuality + (maxQuality - minQuality) * comparisonFactor;
232
233      #region fixed intervals
234      if (UseFixedEvaluationIntervals) {
235        double threshold = parentQuality * RelativeParentChildQualityThreshold;
236
237        if (UseAdaptiveQualityThreshold) {
238          var actualSelectionPressure = ActualSelectionPressureParameter.ActualValue;
239          if (actualSelectionPressure != null)
240            threshold = parentQuality * (1 - actualSelectionPressure.Value / 100.0);
241        }
242
243        var estimatedEnumerator = estimatedValues.GetEnumerator();
244        var targetEnumerator = targetValues.GetEnumerator();
245
246        var rcalc = new OnlinePearsonsRCalculator();
247        var trainingPartitionSize = problemData.TrainingPartition.Size;
248        var interval = (int)Math.Floor(trainingPartitionSize * RelativeFitnessEvaluationIntervalSize);
249
250        var calculatedRows = 0;
251        #region aggregate statistics
252        if (AggregateStatistics) {
253          var trainingEnd = problemData.TrainingPartition.End;
254
255          double quality = 0;
256          int intervalCount = 0, rejectionInterval = 0;
257          var predictedRejected = false;
258
259          while (estimatedEnumerator.MoveNext() & targetEnumerator.MoveNext()) {
260            var estimated = estimatedEnumerator.Current;
261            var target = targetEnumerator.Current;
262            rcalc.Add(target, estimated);
263            ++calculatedRows;
264            if (calculatedRows % interval == 0 || calculatedRows == trainingPartitionSize) {
265              intervalCount++;
266              if (predictedRejected) continue;
267              var r = rcalc.ErrorState == OnlineCalculatorError.None ? rcalc.R : 0d;
268              quality = r * r;
269              if (!(quality > threshold)) {
270                rejectionInterval = intervalCount - 1;
271                predictedRejected = true;
272              }
273            }
274          }
275          var actualQuality = rcalc.ErrorState == OnlineCalculatorError.None ? rcalc.R : 0d;
276          actualQuality *= actualQuality;
277
278          if (!predictedRejected) quality = actualQuality;
279
280          var actuallyRejected = !(actualQuality > parentQuality);
281
282          if (RejectedStats.Rows == 0 || TotalStats.Rows == 0) {
283            RejectedStats = new IntMatrix(2, intervalCount + 1);
284            RejectedStats.RowNames = new[] { "Predicted", "Actual" };
285            RejectedStats.ColumnNames = Enumerable.Range(1, RejectedStats.Columns).Select(x => string.Format("0-{0}", Math.Min(trainingEnd, x * interval)));
286            TotalStats = new IntMatrix(2, 1);
287            TotalStats.RowNames = new[] { "Predicted", "Actual" };
288            TotalStats.ColumnNames = new[] { "Rejected" };
289          }
290
291          if (actuallyRejected) {
292            TotalStats[0, 0]++; // prediction true
293            TotalStats[1, 0]++;
294            RejectedStats[0, rejectionInterval]++;
295            RejectedStats[1, rejectionInterval]++;
296          } else {
297            if (predictedRejected) {
298              RejectedStats[0, rejectionInterval]++;
299              TotalStats[0, 0]++;
300            }
301          }
302          return quality;
303        }
304        #endregion
305        else {
306          while (estimatedEnumerator.MoveNext() & targetEnumerator.MoveNext()) {
307            rcalc.Add(targetEnumerator.Current, estimatedEnumerator.Current);
308            ++calculatedRows;
309            if (calculatedRows % interval == 0 || calculatedRows == trainingPartitionSize) {
310              var q = rcalc.ErrorState != OnlineCalculatorError.None ? double.NaN : rcalc.R;
311              var quality = q * q;
312              if (!(quality > threshold)) {
313                AdjustedEvaluatedSolutions += (double)calculatedRows / problemData.TrainingPartition.Size;
314                return quality;
315              }
316            }
317          }
318          var r = rcalc.ErrorState != OnlineCalculatorError.None ? double.NaN : rcalc.R;
319          var actualQuality = r * r;
320          AdjustedEvaluatedSolutions += 1d;
321          return actualQuality;
322        }
323        #endregion
324      } else {
325        var lsc = new OnlineLinearScalingParameterCalculator();
326        var rcalc = new OnlinePearsonsRCalculator();
327        var interval = (int)Math.Round(RelativeFitnessEvaluationIntervalSize * problemData.TrainingPartition.Size);
328        var quality = 0d;
329        var calculatedRows = 0;
330
331        var cache = PreserveResultCompatibility ? new List<double>(problemData.TrainingPartition.Size) : null;
332        foreach (var target in estimatedValues.Zip(targetValues, (e, t) => new { EstimatedValue = e, ActualValue = t })) {
333          if (cache != null)
334            cache.Add(target.EstimatedValue);
335
336          lsc.Add(target.EstimatedValue, target.ActualValue);
337          rcalc.Add(target.EstimatedValue, target.ActualValue);
338
339          calculatedRows++;
340
341          if (calculatedRows % interval != 0) continue;
342
343          var alpha = lsc.Alpha;
344          var beta = lsc.Beta;
345          if (lsc.ErrorState != OnlineCalculatorError.None) {
346            alpha = 0;
347            beta = 1;
348          }
349
350          var calc = (OnlinePearsonsRCalculator)rcalc.Clone();
351          foreach (var t in targetValues.Skip(calculatedRows)) {
352            var s = (t - alpha) / beta; // scaled target
353            calc.Add(s, t); // add pair (scaled, target) to the calculator
354          }
355          var r = calc.ErrorState == OnlineCalculatorError.None ? calc.R : 0d;
356          quality = r * r;
357
358          if (!(quality > parentQuality)) {
359            AdjustedEvaluatedSolutions += (double)calculatedRows / problemData.TrainingPartition.Size;
360            return quality;
361          }
362        }
363        if (PreserveResultCompatibility) {
364          // get quality for all the rows. to ensure reproducibility of results between this evaluator
365          // and the standard one, we calculate the quality in an identical way (otherwise the returned
366          // quality could be slightly off due to rounding errors (in the range 1e-15 to 1e-16)
367          var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
368          double r;
369          OnlineCalculatorError calculatorError;
370
371          if (applyLinearScaling) {
372            var alpha = lsc.Alpha;
373            var beta = lsc.Beta;
374            if (lsc.ErrorState != OnlineCalculatorError.None) {
375              alpha = 0;
376              beta = 1;
377            }
378            var boundedEstimatedValues = cache.Select(x => x * beta + alpha).LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
379            r = OnlinePearsonsRCalculator.Calculate(boundedEstimatedValues, targetValues, out calculatorError);
380          } else {
381            var boundedEstimatedValues = cache.LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
382            r = OnlinePearsonsRCalculator.Calculate(boundedEstimatedValues, targetValues, out calculatorError);
383          }
384          quality = calculatorError == OnlineCalculatorError.None ? r * r : 0d;
385        }
386        AdjustedEvaluatedSolutions++;
387        return quality;
388      }
389    }
390
391    public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) {
392      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
393      EstimationLimitsParameter.ExecutionContext = context;
394      ApplyLinearScalingParameter.ExecutionContext = context;
395
396      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
397      var estimationLimits = EstimationLimitsParameter.ActualValue;
398      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
399
400      double r2 = Calculate(interpreter, tree, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
401
402      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
403      EstimationLimitsParameter.ExecutionContext = null;
404      ApplyLinearScalingParameter.ExecutionContext = null;
405
406      return r2;
407    }
408  }
409}
Note: See TracBrowser for help on using the repository browser.