source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis/3.3/Operators/CovariantParsimonyPressure.cs @ 4329

Last change on this file since 4329 was 4329, checked in by gkronber, 12 years ago

Extended covariant parsimony pressure operator to make it possible to apply pressure over the whole run, increasing the avg. tree size gradually and decreasing it when overfitting. #1142

File size: 11.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Linq;
24using alglib;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Operators;
28using HeuristicLab.Optimization;
29using HeuristicLab.Parameters;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
32using System.Collections.Generic;
33using HeuristicLab.Problems.DataAnalysis.Evaluators;
34using HeuristicLab.Analysis;
35
36namespace HeuristicLab.Problems.DataAnalysis.Operators {
37  [Item("Covariant Parsimony Pressure", "Covariant Parsimony Pressure.")]
38  [StorableClass]
39  public class CovariantParsimonyPressure : SingleSuccessorOperator {
40    public IScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
41      get { return (IScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters["SymbolicExpressionTree"]; }
42    }
43    public IScopeTreeLookupParameter<DoubleValue> QualityParameter {
44      get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters["Quality"]; }
45    }
46    public IScopeTreeLookupParameter<DoubleValue> AdjustedQualityParameter {
47      get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters["AdjustedQuality"]; }
48    }
49    public ILookupParameter<BoolValue> MaximizationParameter {
50      get { return (ILookupParameter<BoolValue>)Parameters["Maximization"]; }
51    }
52    public IValueLookupParameter<DoubleValue> KParameter {
53      get { return (IValueLookupParameter<DoubleValue>)Parameters["K"]; }
54    }
55    public ILookupParameter<DoubleValue> CParameter {
56      get { return (ILookupParameter<DoubleValue>)Parameters["C"]; }
57    }
58    public ILookupParameter<IntValue> GenerationsParameter {
59      get { return (ILookupParameter<IntValue>)Parameters["Generations"]; }
60    }
61    public IValueLookupParameter<IntValue> FirstGenerationParameter {
62      get { return (IValueLookupParameter<IntValue>)Parameters["FirstGenerationParameter"]; }
63    }
64    public IValueLookupParameter<BoolValue> ApplyParsimonyPressureParameter {
65      get { return (IValueLookupParameter<BoolValue>)Parameters["ApplyParsimonyPressure"]; }
66    }
67    public ILookupParameter<DoubleValue> LengthCorrelationParameter {
68      get { return (ILookupParameter<DoubleValue>)Parameters["Correlation(Length, AdjustedFitness)"]; }
69    }
70    public ILookupParameter<DoubleValue> FitnessCorrelationParameter {
71      get { return (ILookupParameter<DoubleValue>)Parameters["Correlation(Fitness, AdjustedFitness)"]; }
72    }
73    public IValueLookupParameter<PercentValue> ComplexityAdaptionParameter {
74      get { return (IValueLookupParameter<PercentValue>)Parameters["ComplexityAdaption"]; }
75    }
76    public IValueLookupParameter<BoolValue> InvertComplexityAdaptionParameter {
77      get { return (IValueLookupParameter<BoolValue>)Parameters["InvertComplexityAdaption"]; }
78    }
79    public IValueLookupParameter<DoubleValue> MinAverageSizeParameter {
80      get { return (IValueLookupParameter<DoubleValue>)Parameters["MinAverageSize"]; }
81    }
82
83    public CovariantParsimonyPressure(bool deserializing) : base(deserializing) { }
84    public CovariantParsimonyPressure()
85      : base() {
86      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>("SymbolicExpressionTree"));
87      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("Quality"));
88      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("AdjustedQuality"));
89      Parameters.Add(new LookupParameter<BoolValue>("Maximization"));
90      Parameters.Add(new ValueLookupParameter<DoubleValue>("K", new DoubleValue(1.0)));
91      Parameters.Add(new LookupParameter<IntValue>("Generations"));
92      Parameters.Add(new ValueLookupParameter<IntValue>("FirstGenerationParameter", new IntValue(1)));
93      Parameters.Add(new ValueLookupParameter<BoolValue>("ApplyParsimonyPressure"));
94      Parameters.Add(new ValueLookupParameter<PercentValue>("ComplexityAdaption", new PercentValue(-0.01)));
95      Parameters.Add(new LookupParameter<DoubleValue>("Correlation(Length, AdjustedFitness)"));
96      Parameters.Add(new LookupParameter<DoubleValue>("Correlation(Fitness, AdjustedFitness)"));
97      Parameters.Add(new ValueLookupParameter<DoubleValue>("MinAverageSize", new DoubleValue(15)));
98      Parameters.Add(new LookupParameter<DoubleValue>("C"));
99      Parameters.Add(new ValueLookupParameter<BoolValue>("InvertComplexityAdaption"));
100    }
101
102    [StorableHook(Persistence.Default.CompositeSerializers.Storable.HookType.AfterDeserialization)]
103    private void AfterDeserialization() {
104      if (!Parameters.ContainsKey("Maximization"))
105        Parameters.Add(new LookupParameter<BoolValue>("Maximization"));
106      if (!Parameters.ContainsKey("K"))
107        Parameters.Add(new ValueLookupParameter<DoubleValue>("K", new DoubleValue(1.0)));
108      if (!Parameters.ContainsKey("AdjustedQuality")) {
109        Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("AdjustedQuality"));
110      }
111      if (!Parameters.ContainsKey("Generations")) {
112        Parameters.Add(new LookupParameter<IntValue>("Generations"));
113      }
114      if (!Parameters.ContainsKey("FirstGenerationParameter")) {
115        Parameters.Add(new ValueLookupParameter<IntValue>("FirstGenerationParameter", new IntValue(1)));
116      }
117      if (!Parameters.ContainsKey("ApplyParsimonyPressure")) {
118        Parameters.Add(new ValueLookupParameter<BoolValue>("ApplyParsimonyPressure"));
119      }
120      if (!Parameters.ContainsKey("ComplexityAdaption")) {
121        Parameters.Add(new ValueLookupParameter<PercentValue>("ComplexityAdaption", new PercentValue(-0.01)));
122      }
123      if (!Parameters.ContainsKey("MinAverageSize")) {
124        Parameters.Add(new ValueLookupParameter<DoubleValue>("MinAverageSize", new DoubleValue(15)));
125      }
126      if (!Parameters.ContainsKey("C")) {
127        Parameters.Add(new LookupParameter<DoubleValue>("C"));
128      }
129      if (!Parameters.ContainsKey("InvertComplexityAdaption")) {
130        Parameters.Add(new ValueLookupParameter<BoolValue>("InvertComplexityAdaption"));
131      }
132    }
133
134    public override IOperation Apply() {
135      ItemArray<SymbolicExpressionTree> trees = SymbolicExpressionTreeParameter.ActualValue;
136      ItemArray<DoubleValue> qualities = QualityParameter.ActualValue;
137      // always apply Parsimony pressure if overfitting has been detected
138      // otherwise appliy PP only when we are currently overfitting
139      if (GenerationsParameter.ActualValue != null && GenerationsParameter.ActualValue.Value >= FirstGenerationParameter.ActualValue.Value &&
140           ApplyParsimonyPressureParameter.ActualValue.Value == true) {
141        var lengths = from tree in trees
142                      select tree.Size;
143        double k = KParameter.ActualValue.Value;
144
145        // calculate cov(f, l) and cov(l, l^k)
146        OnlineCovarianceEvaluator lengthFitnessCovEvaluator = new OnlineCovarianceEvaluator();
147        OnlineCovarianceEvaluator lengthAdjLengthCovEvaluator = new OnlineCovarianceEvaluator();
148        OnlineMeanAndVarianceCalculator lengthMeanCalculator = new OnlineMeanAndVarianceCalculator();
149        OnlineMeanAndVarianceCalculator fitnessMeanCalculator = new OnlineMeanAndVarianceCalculator();
150        OnlineMeanAndVarianceCalculator adjLengthMeanCalculator = new OnlineMeanAndVarianceCalculator();
151        var lengthEnumerator = lengths.GetEnumerator();
152        var qualityEnumerator = qualities.GetEnumerator();
153        while (lengthEnumerator.MoveNext() & qualityEnumerator.MoveNext()) {
154          double fitness = qualityEnumerator.Current.Value;
155          if (!MaximizationParameter.ActualValue.Value) {
156            // use f = 1 / (1 + quality) for minimization problems
157            fitness = 1.0 / (1.0 + fitness);
158          }
159          lengthFitnessCovEvaluator.Add(lengthEnumerator.Current, fitness);
160          lengthAdjLengthCovEvaluator.Add(lengthEnumerator.Current, Math.Pow(lengthEnumerator.Current, k));
161          lengthMeanCalculator.Add(lengthEnumerator.Current);
162          fitnessMeanCalculator.Add(fitness);
163          adjLengthMeanCalculator.Add(Math.Pow(lengthEnumerator.Current, k));
164        }
165
166        //double sizeAdaption = lengthMeanCalculator.Mean * ComplexityAdaptionParameter.ActualValue.Value;
167        double sizeAdaption = 100.0 * ComplexityAdaptionParameter.ActualValue.Value;
168        if (InvertComplexityAdaptionParameter.ActualValue != null && InvertComplexityAdaptionParameter.ActualValue.Value) {
169          sizeAdaption = -sizeAdaption;
170        }
171        if (lengthMeanCalculator.Mean + sizeAdaption < MinAverageSizeParameter.ActualValue.Value)
172          sizeAdaption = 0.0;
173
174        //            cov(l, f) - (g(t+1) - mu(t)) avgF
175        // c(t) =  --------------------------------------------
176        //           cov(l, l^k) - (g(t+1) - mu(t)) E[l^k]
177        double c = lengthFitnessCovEvaluator.Covariance - sizeAdaption * fitnessMeanCalculator.Mean;
178        c /= lengthAdjLengthCovEvaluator.Covariance - sizeAdaption * adjLengthMeanCalculator.Mean;
179
180        CParameter.ActualValue = new DoubleValue(c);
181
182        // adjust fitness
183        bool maximization = MaximizationParameter.ActualValue.Value;
184
185        lengthEnumerator = lengths.GetEnumerator();
186        qualityEnumerator = qualities.GetEnumerator();
187        int i = 0;
188        ItemArray<DoubleValue> adjQualities = new ItemArray<DoubleValue>(qualities.Length);
189
190        while (lengthEnumerator.MoveNext() & qualityEnumerator.MoveNext()) {
191          adjQualities[i++] = new DoubleValue(qualityEnumerator.Current.Value - c * Math.Pow(lengthEnumerator.Current, k));
192        }
193        AdjustedQualityParameter.ActualValue = adjQualities;
194        double[] lengthArr = lengths.Select(x => (double)x).ToArray<double>();
195
196        double[] adjFitess = (from f in AdjustedQualityParameter.ActualValue
197                              select f.Value).ToArray<double>();
198        double[] fitnessArr = (from f in QualityParameter.ActualValue
199                               let normFit = maximization ? f.Value : 1.0 / (1.0 + f.Value)
200                               select normFit).ToArray<double>();
201
202        LengthCorrelationParameter.ActualValue = new DoubleValue(alglib.correlation.spearmanrankcorrelation(lengthArr, adjFitess, lengthArr.Length));
203        FitnessCorrelationParameter.ActualValue = new DoubleValue(alglib.correlation.spearmanrankcorrelation(fitnessArr, adjFitess, lengthArr.Length));
204
205      } else {
206        CParameter.ActualValue = new DoubleValue(0.0);
207        // adjusted fitness is equal to fitness
208        AdjustedQualityParameter.ActualValue = (ItemArray<DoubleValue>)QualityParameter.ActualValue.Clone();
209        FitnessCorrelationParameter.ActualValue = new DoubleValue(1.0);
210
211        double[] lengths = (from tree in trees
212                            select (double)tree.Size).ToArray<double>();
213
214        double[] fitess = (from f in AdjustedQualityParameter.ActualValue
215                           select f.Value).ToArray<double>();
216
217        LengthCorrelationParameter.ActualValue = new DoubleValue(alglib.correlation.spearmanrankcorrelation(lengths, fitess, lengths.Length));
218      }
219      return base.Apply();
220    }
221  }
222}
Note: See TracBrowser for help on using the repository browser.