Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator/SymbolicRegressionSingleObjectiveOSGAEvaluator.cs @ 14104

Last change on this file since 14104 was 14104, checked in by bburlacu, 6 years ago

#2635: Improve performance and accuracy of evaluator and analyzer.

File size: 13.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
34  [Item("SymbolicRegressionSingleObjectiveOSGAEvaluator", "An evaluator which tries to predict when a child will not be able to fullfil offspring selection criteria, to save evaluation time.")]
35  [StorableClass]
36  public class SymbolicRegressionSingleObjectiveOsgaEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
37    private const string RelativeParentChildQualityThresholdParameterName = "RelativeParentChildQualityThreshold";
38    private const string RelativeFitnessEvaluationIntervalSizeParameterName = "RelativeFitnessEvaluationIntervalSize";
39    private const string ResultCollectionParameterName = "Results";
40
41    #region parameters
42    public ILookupParameter<ResultCollection> ResultCollectionParameter {
43      get { return (ILookupParameter<ResultCollection>)Parameters[ResultCollectionParameterName]; }
44    }
45
46    public IValueParameter<IntMatrix> RejectedStatsParameter {
47      get { return (IValueParameter<IntMatrix>)Parameters["RejectedStats"]; }
48    }
49    public IValueParameter<IntMatrix> NotRejectedStatsParameter {
50      get { return (IValueParameter<IntMatrix>)Parameters["TotalStats"]; }
51    }
52    public IValueLookupParameter<DoubleValue> ComparisonFactorParameter {
53      get { return (ValueLookupParameter<DoubleValue>)Parameters["ComparisonFactor"]; }
54    }
55    public IFixedValueParameter<PercentValue> RelativeParentChildQualityThresholdParameter {
56      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeParentChildQualityThresholdParameterName]; }
57    }
58    public IFixedValueParameter<PercentValue> RelativeFitnessEvaluationIntervalSizeParameter {
59      get { return (IFixedValueParameter<PercentValue>)Parameters[RelativeFitnessEvaluationIntervalSizeParameterName]; }
60    }
61    public IScopeTreeLookupParameter<DoubleValue> ParentQualitiesParameter { get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters["ParentQualities"]; } }
62    #endregion
63
64    #region parameter properties
65    public double RelativeParentChildQualityThreshold {
66      get { return RelativeParentChildQualityThresholdParameter.Value.Value; }
67      set { RelativeParentChildQualityThresholdParameter.Value.Value = value; }
68    }
69
70    public double RelativeFitnessEvaluationIntervalSize {
71      get { return RelativeFitnessEvaluationIntervalSizeParameter.Value.Value; }
72      set { RelativeFitnessEvaluationIntervalSizeParameter.Value.Value = value; }
73    }
74
75    public IntMatrix RejectedStats {
76      get { return RejectedStatsParameter.Value; }
77      set { RejectedStatsParameter.Value = value; }
78    }
79
80    public IntMatrix TotalStats {
81      get { return NotRejectedStatsParameter.Value; }
82      set { NotRejectedStatsParameter.Value = value; }
83    }
84    #endregion
85
86    public override bool Maximization {
87      get { return true; }
88    }
89
90    public SymbolicRegressionSingleObjectiveOsgaEvaluator() {
91      Parameters.Add(new ValueLookupParameter<DoubleValue>("ComparisonFactor", "Determines if the quality should be compared to the better parent (1.0), to the worse (0.0) or to any linearly interpolated value between them."));
92      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeParentChildQualityThresholdParameterName, new PercentValue(0.9)));
93      Parameters.Add(new FixedValueParameter<PercentValue>(RelativeFitnessEvaluationIntervalSizeParameterName, new PercentValue(0.1)));
94      Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName));
95      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ParentQualities") { ActualName = "Quality" });
96      Parameters.Add(new ValueParameter<IntMatrix>("RejectedStats", new IntMatrix()));
97      Parameters.Add(new ValueParameter<IntMatrix>("TotalStats", new IntMatrix()));
98    }
99
100    [StorableHook(HookType.AfterDeserialization)]
101    private void AfterDeserialization() {
102      if (!Parameters.ContainsKey(ResultCollectionParameterName))
103        Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName));
104
105      if (!Parameters.ContainsKey("ParentQualities"))
106        Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ParentQualities") { ActualName = "Quality" });
107
108      if (!Parameters.ContainsKey("RejectedStats"))
109        Parameters.Add(new ValueParameter<IntMatrix>("RejectedStats", new IntMatrix()));
110
111      if (!Parameters.ContainsKey("TotalStats"))
112        Parameters.Add(new ValueParameter<IntMatrix>("TotalStats", new IntMatrix()));
113    }
114
115    [StorableConstructor]
116    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(bool deserializing) : base(deserializing) { }
117
118    protected SymbolicRegressionSingleObjectiveOsgaEvaluator(SymbolicRegressionSingleObjectiveOsgaEvaluator original, Cloner cloner) : base(original, cloner) { }
119
120    public override IDeepCloneable Clone(Cloner cloner) {
121      return new SymbolicRegressionSingleObjectiveOsgaEvaluator(this, cloner);
122    }
123
124    public override void ClearState() {
125      base.ClearState();
126      RejectedStats = new IntMatrix();
127      TotalStats = new IntMatrix();
128    }
129
130    public override IOperation InstrumentedApply() {
131      var solution = SymbolicExpressionTreeParameter.ActualValue;
132      IEnumerable<int> rows = GenerateRowsToEvaluate();
133
134      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
135      var estimationLimits = EstimationLimitsParameter.ActualValue;
136      var problemData = ProblemDataParameter.ActualValue;
137      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
138
139      double quality;
140      var parentQualities = ParentQualitiesParameter.ActualValue;
141
142      // parent subscopes are not present during evaluation of the initial population
143      if (parentQualities.Length > 0) {
144        quality = Calculate(interpreter, solution, estimationLimits, problemData, rows, applyLinearScaling);
145      } else {
146        quality = Calculate(interpreter, solution, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
147      }
148      QualityParameter.ActualValue = new DoubleValue(quality);
149
150      return base.InstrumentedApply();
151    }
152
153    public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
154      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
155      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
156      OnlineCalculatorError errorState;
157
158      double r;
159      if (applyLinearScaling) {
160        var rCalculator = new OnlinePearsonsRCalculator();
161        CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, rCalculator, problemData.Dataset.Rows);
162        errorState = rCalculator.ErrorState;
163        r = rCalculator.R;
164      } else {
165        IEnumerable<double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
166        r = OnlinePearsonsRCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
167      }
168      if (errorState != OnlineCalculatorError.None) return double.NaN;
169      return r * r;
170    }
171
172    private double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, DoubleLimit estimationLimits, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
173      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows).ToList();
174      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
175      IEnumerator<double> targetValuesEnumerator;
176
177      double alpha = 0, beta = 1;
178      if (applyLinearScaling) {
179        var linearScalingCalculator = new OnlineLinearScalingParameterCalculator();
180        targetValuesEnumerator = targetValues.GetEnumerator();
181        var estimatedValuesEnumerator = estimatedValues.GetEnumerator();
182        while (targetValuesEnumerator.MoveNext() & estimatedValuesEnumerator.MoveNext()) {
183          double target = targetValuesEnumerator.Current;
184          double estimated = estimatedValuesEnumerator.Current;
185          if (!double.IsNaN(estimated) && !double.IsInfinity(estimated))
186            linearScalingCalculator.Add(estimated, target);
187        }
188        if (linearScalingCalculator.ErrorState == OnlineCalculatorError.None && (targetValuesEnumerator.MoveNext() || estimatedValuesEnumerator.MoveNext()))
189          throw new ArgumentException("Number of elements in target and estimated values enumeration do not match.");
190
191        alpha = linearScalingCalculator.Alpha;
192        beta = linearScalingCalculator.Beta;
193        if (linearScalingCalculator.ErrorState != OnlineCalculatorError.None) {
194          alpha = 0.0;
195          beta = 1.0;
196        }
197      }
198      var scaledEstimatedValuesEnumerator = estimatedValues.Select(x => x * beta + alpha).LimitToRange(estimationLimits.Lower, estimationLimits.Upper).GetEnumerator();
199      targetValuesEnumerator = targetValues.GetEnumerator();
200
201      var pearsonRCalculator = new OnlinePearsonsRCalculator();
202
203      var interval = (int)Math.Floor(problemData.TrainingPartition.Size * RelativeFitnessEvaluationIntervalSize);
204      var i = problemData.TrainingPartition.Start;
205      var trainingEnd = problemData.TrainingPartition.End;
206      var qualityPerInterval = new List<double>();
207      while (targetValuesEnumerator.MoveNext() && scaledEstimatedValuesEnumerator.MoveNext()) {
208        pearsonRCalculator.Add(targetValuesEnumerator.Current, scaledEstimatedValuesEnumerator.Current);
209        ++i;
210        if (i % interval == 0 || i == trainingEnd) {
211          var q = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
212          qualityPerInterval.Add(q * q);
213        }
214      }
215      var r = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
216      var actualQuality = r * r;
217      var parentQualities = ParentQualitiesParameter.ActualValue.Select(x => x.Value);
218      var minQuality = parentQualities.Min();
219      var maxQuality = parentQualities.Max();
220      var comparisonFactor = ComparisonFactorParameter.ActualValue.Value;
221      var parentQuality = minQuality + (maxQuality - minQuality) * comparisonFactor;
222      var threshold = parentQuality * RelativeParentChildQualityThreshold;
223
224      bool predictedRejected = false;
225
226      i = 0;
227      foreach (var q in qualityPerInterval) {
228        if (double.IsNaN(q) || !(q > threshold)) {
229          predictedRejected = true;
230          break;
231        }
232        ++i;
233      }
234
235      var actuallyRejected = !(actualQuality > parentQuality);
236
237      if (RejectedStats.Rows == 0 || TotalStats.Rows == 0) {
238        RejectedStats = new IntMatrix(2, qualityPerInterval.Count);
239        RejectedStats.RowNames = new[] { "Predicted", "Actual" };
240        RejectedStats.ColumnNames = Enumerable.Range(1, RejectedStats.Columns).Select(x => string.Format("0-{0}", Math.Min(trainingEnd, x * interval)));
241        TotalStats = new IntMatrix(2, 2);
242        TotalStats.RowNames = new[] { "Predicted", "Actual" };
243        TotalStats.ColumnNames = new[] { "Rejected", "Not Rejected" };
244      }
245      // gather some statistics
246      if (predictedRejected) {
247        RejectedStats[0, i]++;
248        TotalStats[0, 0]++;
249      } else {
250        TotalStats[0, 1]++;
251      }
252      if (actuallyRejected) {
253        TotalStats[1, 0]++;
254      } else {
255        TotalStats[1, 1]++;
256      }
257      if (predictedRejected && actuallyRejected) {
258        RejectedStats[1, i]++;
259      }
260      return r * r;
261    }
262
263    public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) {
264      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
265      EstimationLimitsParameter.ExecutionContext = context;
266      ApplyLinearScalingParameter.ExecutionContext = context;
267
268      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
269      var estimationLimits = EstimationLimitsParameter.ActualValue;
270      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
271
272      double r2 = Calculate(interpreter, tree, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
273
274      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
275      EstimationLimitsParameter.ExecutionContext = null;
276      ApplyLinearScalingParameter.ExecutionContext = null;
277
278      return r2;
279    }
280  }
281}
Note: See TracBrowser for help on using the repository browser.