source: branches/HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator/SymbolicRegressionSingleObjectiveOSGAEvaluator.cs @ 14231

Last change on this file since 14231 was 14231, checked in by bburlacu, 5 years ago

#2635: Add evaluator option to aggregate statistics during evaluation (slower). Improve analyzer.

File size: 14.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
34  [Item("SymbolicRegressionSingleObjectiveOSGAEvaluator", "An evaluator which tries to predict when a child will not be able to fullfil offspring selection criteria, to save evaluation time.")]
35  [StorableClass]
36  public class SymbolicRegressionSingleObjectiveOsgaEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
37    private const string RelativeParentChildQualityThresholdParameterName = "RelativeParentChildQualityThreshold";
38    private const string RelativeFitnessEvaluationIntervalSizeParameterName = "RelativeFitnessEvaluationIntervalSize";
39    private const string ResultCollectionParameterName = "Results";
40    private const string AggregateStatisticsParameterName = "AggregateStatistics";
41
42    #region parameters
43    public ILookupParameter<ResultCollection> ResultCollectionParameter {
44      get { return (ILookupParameter<ResultCollection>)Parameters[ResultCollectionParameterName]; }
45    }
46    public IValueParameter<BoolValue> AggregateStatisticsParameter {
47      get { return (IValueParameter<BoolValue>)Parameters[AggregateStatisticsParameterName]; }
48    }
49    public IValueParameter<IntMatrix> RejectedStatsParameter {
50      get { return (IValueParameter<IntMatrix>)Parameters["RejectedStats"]; }
51    }
52    public IValueParameter<IntMatrix> NotRejectedStatsParameter {
53      get { return (IValueParameter<IntMatrix>)Parameters["TotalStats"]; }
54    }
55    public IValueLookupParameter<DoubleValue> ComparisonFactorParameter {
56      get { return (ValueLookupParameter<DoubleValue>)Parameters["ComparisonFactor"]; }
57    }
58    public IFixedValueParameter<PercentValue> RelativeParentChildQualityThresholdParameter {
59      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeParentChildQualityThresholdParameterName]; }
60    }
61    public IFixedValueParameter<PercentValue> RelativeFitnessEvaluationIntervalSizeParameter {
62      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeFitnessEvaluationIntervalSizeParameterName]; }
63    }
64    public IScopeTreeLookupParameter<DoubleValue> ParentQualitiesParameter { get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters["ParentQualities"]; } }
65    #endregion
66
67    #region parameter properties
68    public double RelativeParentChildQualityThreshold {
69      get { return RelativeParentChildQualityThresholdParameter.Value.Value; }
70      set { RelativeParentChildQualityThresholdParameter.Value.Value = value; }
71    }
72
73    public double RelativeFitnessEvaluationIntervalSize {
74      get { return RelativeFitnessEvaluationIntervalSizeParameter.Value.Value; }
75      set { RelativeFitnessEvaluationIntervalSizeParameter.Value.Value = value; }
76    }
77
78    public IntMatrix RejectedStats {
79      get { return RejectedStatsParameter.Value; }
80      set { RejectedStatsParameter.Value = value; }
81    }
82
83    public IntMatrix TotalStats {
84      get { return NotRejectedStatsParameter.Value; }
85      set { NotRejectedStatsParameter.Value = value; }
86    }
87    #endregion
88
89    public override bool Maximization {
90      get { return true; }
91    }
92
93    public SymbolicRegressionSingleObjectiveOsgaEvaluator() {
94      Parameters.Add(new ValueLookupParameter<DoubleValue>("ComparisonFactor", "Determines if the quality should be compared to the better parent (1.0), to the worse (0.0) or to any linearly interpolated value between them."));
95      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeParentChildQualityThresholdParameterName, new PercentValue(0.9)));
96      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeFitnessEvaluationIntervalSizeParameterName, new PercentValue(0.1)));
97      Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName));
98      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ParentQualities") { ActualName = "Quality" });
99      Parameters.Add(new ValueParameter<IntMatrix>("RejectedStats", new IntMatrix()));
100      Parameters.Add(new ValueParameter<IntMatrix>("TotalStats", new IntMatrix()));
101      Parameters.Add(new ValueParameter<BoolValue>(AggregateStatisticsParameterName, new BoolValue(false)));
102    }
103
104    [StorableHook(HookType.AfterDeserialization)]
105    private void AfterDeserialization() {
106      if (!Parameters.ContainsKey(ResultCollectionParameterName))
107        Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName));
108
109      if (!Parameters.ContainsKey("ParentQualities"))
110        Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ParentQualities") { ActualName = "Quality" });
111
112      if (!Parameters.ContainsKey("RejectedStats"))
113        Parameters.Add(new ValueParameter<IntMatrix>("RejectedStats", new IntMatrix()));
114
115      if (!Parameters.ContainsKey("TotalStats"))
116        Parameters.Add(new ValueParameter<IntMatrix>("TotalStats", new IntMatrix()));
117    }
118
119    [StorableConstructor]
120    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(bool deserializing) : base(deserializing) { }
121
122    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(SymbolicRegressionSingleObjectiveOsgaEvaluator original, Cloner cloner) : base(original, cloner) { }
123
124    public override IDeepCloneable Clone(Cloner cloner) {
125      return new SymbolicRegressionSingleObjectiveOsgaEvaluator(this, cloner);
126    }
127
128    public override void ClearState() {
129      base.ClearState();
130      RejectedStats = new IntMatrix();
131      TotalStats = new IntMatrix();
132    }
133
134    public override IOperation InstrumentedApply() {
135      var solution = SymbolicExpressionTreeParameter.ActualValue;
136      IEnumerable<int> rows = GenerateRowsToEvaluate();
137
138      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
139      var estimationLimits = EstimationLimitsParameter.ActualValue;
140      var problemData = ProblemDataParameter.ActualValue;
141      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
142
143      double quality;
144      var parentQualities = ParentQualitiesParameter.ActualValue;
145
146      // parent subscopes are not present during evaluation of the initial population
147      if (parentQualities.Length > 0) {
148        quality = Calculate(interpreter, solution, estimationLimits, problemData, rows, applyLinearScaling);
149      } else {
150        quality = Calculate(interpreter, solution, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
151      }
152      QualityParameter.ActualValue = new DoubleValue(quality);
153
154      return base.InstrumentedApply();
155    }
156
157    public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
158      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
159      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
160      OnlineCalculatorError errorState;
161
162      double r;
163      if (applyLinearScaling) {
164        var rCalculator = new OnlinePearsonsRCalculator();
165        CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, rCalculator, problemData.Dataset.Rows);
166        errorState = rCalculator.ErrorState;
167        r = rCalculator.R;
168      } else {
169        IEnumerable<double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
170        r = OnlinePearsonsRCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
171      }
172      if (errorState != OnlineCalculatorError.None) return double.NaN;
173      return r * r;
174    }
175
176    private double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, DoubleLimit estimationLimits, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
177      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows).LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
178      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
179
180      var parentQualities = ParentQualitiesParameter.ActualValue.Select(x => x.Value);
181      var minQuality = parentQualities.Min();
182      var maxQuality = parentQualities.Max();
183      var comparisonFactor = ComparisonFactorParameter.ActualValue.Value;
184      var parentQuality = minQuality + (maxQuality - minQuality) * comparisonFactor;
185      var threshold = parentQuality * RelativeParentChildQualityThreshold;
186
187      var pearsonRCalculator = new OnlinePearsonsRCalculator();
188      var targetValuesEnumerator = targetValues.GetEnumerator();
189      var estimatedValuesEnumerator = estimatedValues.GetEnumerator();
190      var trainingPartitionSize = problemData.TrainingPartition.Size;
191      var interval = (int)Math.Floor(trainingPartitionSize * RelativeFitnessEvaluationIntervalSize);
192
193      var aggregateStatistics = AggregateStatisticsParameter.Value.Value;
194      var i = 0;
195      if (aggregateStatistics) {
196        var trainingEnd = problemData.TrainingPartition.End;
197        var qualityPerInterval = new List<double>();
198        while (targetValuesEnumerator.MoveNext() && estimatedValuesEnumerator.MoveNext()) {
199          pearsonRCalculator.Add(targetValuesEnumerator.Current, estimatedValuesEnumerator.Current);
200          ++i;
201          if (i % interval == 0 || i == trainingPartitionSize) {
202            var q = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
203            qualityPerInterval.Add(q * q);
204          }
205        }
206        var r = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
207        var actualQuality = r * r;
208
209        bool predictedRejected = false;
210
211        i = 0;
212        double quality = actualQuality;
213        foreach (var q in qualityPerInterval) {
214          if (double.IsNaN(q) || !(q > threshold)) {
215            predictedRejected = true;
216            quality = q;
217            break;
218          }
219          ++i;
220        }
221
222        var actuallyRejected = !(actualQuality > parentQuality);
223
224        if (RejectedStats.Rows == 0 || TotalStats.Rows == 0) {
225          RejectedStats = new IntMatrix(2, qualityPerInterval.Count);
226          RejectedStats.RowNames = new[] { "Predicted", "Actual" };
227          RejectedStats.ColumnNames = Enumerable.Range(1, RejectedStats.Columns).Select(x => string.Format("0-{0}", Math.Min(trainingEnd, x * interval)));
228          TotalStats = new IntMatrix(2, 2);
229          TotalStats.RowNames = new[] { "Predicted", "Actual" };
230          TotalStats.ColumnNames = new[] { "Rejected", "Not Rejected" };
231        }
232        // gather some statistics
233        if (predictedRejected) {
234          RejectedStats[0, i]++;
235          TotalStats[0, 0]++;
236        } else {
237          TotalStats[0, 1]++;
238        }
239        if (actuallyRejected) {
240          TotalStats[1, 0]++;
241        } else {
242          TotalStats[1, 1]++;
243        }
244        if (predictedRejected && actuallyRejected) {
245          RejectedStats[1, i]++;
246        }
247        return quality;
248      } else {
249        while (targetValuesEnumerator.MoveNext() && estimatedValuesEnumerator.MoveNext()) {
250          pearsonRCalculator.Add(targetValuesEnumerator.Current, estimatedValuesEnumerator.Current);
251          ++i;
252          if (i % interval == 0 || i == trainingPartitionSize) {
253            var q = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
254            var quality = q * q;
255            if (!(quality > threshold))
256              return quality;
257          }
258        }
259        var r = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
260        var actualQuality = r * r;
261        return actualQuality;
262      }
263    }
264
265    public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) {
266      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
267      EstimationLimitsParameter.ExecutionContext = context;
268      ApplyLinearScalingParameter.ExecutionContext = context;
269
270      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
271      var estimationLimits = EstimationLimitsParameter.ActualValue;
272      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
273
274      double r2 = Calculate(interpreter, tree, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
275
276      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
277      EstimationLimitsParameter.ExecutionContext = null;
278      ApplyLinearScalingParameter.ExecutionContext = null;
279
280      return r2;
281    }
282  }
283}
Note: See TracBrowser for help on using the repository browser.