Free cookie consent management tool by TermsFeed Policy Generator

source: branches/1772_HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisUsefulGenesAnalyzer.cs @ 16132

Last change on this file since 16132 was 13482, checked in by bburlacu, 9 years ago

#1772: Merged trunk changes

File size: 12.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Analysis;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
30using HeuristicLab.Operators;
31using HeuristicLab.Optimization;
32using HeuristicLab.Parameters;
33using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
34
35namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Analyzers {
36  [StorableClass]
37  [Item("SymbolicDataAnalysisUsefulGenesAnalyzer", "An analyzer which performs pruning by promoting genes in the population that outperform their containing individuals (the individuals are replaced by their subparts).")]
38  public class SymbolicDataAnalysisUsefulGenesAnalyzer : SingleSuccessorOperator, ISymbolicDataAnalysisAnalyzer {
39    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
40    private const string QualityParameterName = "Quality";
41    private const string ResultCollectionParameterName = "Results";
42    private const string SymbolicDataAnalysisTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
43    private const string ProblemDataParameterName = "ProblemData";
44    private const string GenerationsParameterName = "Generations";
45    private const string UpdateCounterParameterName = "UpdateCounter";
46    private const string UpdateIntervalParameterName = "UpdateInterval";
47    private const string MinimumGenerationsParameterName = "MinimumGenerations";
48    private const string PruningProbabilityParameterName = "PruningProbability";
49    private const string PercentageOfBestSolutionsParameterName = "PercentageOfBestSolutions";
50    private const string PromotedSubtreesResultName = "Promoted subtrees";
51    private const string AverageQualityImprovementResultName = "Average quality improvement";
52    private const string AverageLengthReductionResultName = "Average length reduction";
53    private const string RandomParameterName = "Random";
54
55    #region Parameters
56    public IScopeTreeLookupParameter<ISymbolicExpressionTree> SymbolicExpressionTreeParameter {
57      get { return (IScopeTreeLookupParameter<ISymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
58    }
59
60    public IScopeTreeLookupParameter<DoubleValue> QualityParameter {
61      get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
62    }
63
64    public ILookupParameter<IRandom> RandomParameter {
65      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
66    }
67
68    public ILookupParameter<ResultCollection> ResultCollectionParameter {
69      get { return (ILookupParameter<ResultCollection>)Parameters[ResultCollectionParameterName]; }
70    }
71
72    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> SymbolicDataAnalysisTreeInterpreterParameter {
73      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[SymbolicDataAnalysisTreeInterpreterParameterName]; }
74    }
75
76    public ILookupParameter<IDataAnalysisProblemData> ProblemDataParameter {
77      get { return (ILookupParameter<IDataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
78    }
79
80    public ILookupParameter<IntValue> GenerationsParameter {
81      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
82    }
83
84    public ValueParameter<IntValue> UpdateCounterParameter {
85      get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
86    }
87
88    public ValueParameter<IntValue> UpdateIntervalParameter {
89      get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
90    }
91
92    public ValueParameter<IntValue> MinimumGenerationsParameter {
93      get { return (ValueParameter<IntValue>)Parameters[MinimumGenerationsParameterName]; }
94    }
95
96    public ValueParameter<PercentValue> PercentageOfBestSolutionsParameter {
97      get { return (ValueParameter<PercentValue>)Parameters[PercentageOfBestSolutionsParameterName]; }
98    }
99
100    public ValueParameter<PercentValue> PruningProbabilityParameter {
101      get { return (ValueParameter<PercentValue>)Parameters[PruningProbabilityParameterName]; }
102    }
103    #endregion
104
105    #region Parameter properties
106    public int UpdateCounter {
107      get { return UpdateCounterParameter.Value.Value; }
108      set { UpdateCounterParameter.Value.Value = value; }
109    }
110
111    public int UpdateInterval {
112      get { return UpdateIntervalParameter.Value.Value; }
113      set { UpdateIntervalParameter.Value.Value = value; }
114    }
115
116    public int MinimumGenerations {
117      get { return MinimumGenerationsParameter.Value.Value; }
118      set { MinimumGenerationsParameter.Value.Value = value; }
119    }
120
121    public double PercentageOfBestSolutions {
122      get { return PercentageOfBestSolutionsParameter.Value.Value; }
123      set { PercentageOfBestSolutionsParameter.Value.Value = value; }
124    }
125
126    public double PruningProbability {
127      get { return PruningProbabilityParameter.Value.Value; }
128      set { PruningProbabilityParameter.Value.Value = value; }
129    }
130    #endregion
131
132    public SymbolicDataAnalysisUsefulGenesAnalyzer() {
133      #region Add parameters
134      Parameters.Add(new ScopeTreeLookupParameter<ISymbolicExpressionTree>(SymbolicExpressionTreeParameterName));
135      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName));
136      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName));
137      Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName));
138      Parameters.Add(new LookupParameter<IDataAnalysisProblemData>(ProblemDataParameterName));
139      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicDataAnalysisTreeInterpreterParameterName));
140      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName));
141      Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, new IntValue(0)));
142      Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, new IntValue(1)));
143      Parameters.Add(new ValueParameter<IntValue>(MinimumGenerationsParameterName, "The minimum number of generations the algorithm must be let to evolve before applying this analyzer.", new IntValue(50)));
144      Parameters.Add(new ValueParameter<PercentValue>(PercentageOfBestSolutionsParameterName, "How many of the best individuals should be pruned using this method.", new PercentValue(1.0)));
145      Parameters.Add(new ValueParameter<PercentValue>(PruningProbabilityParameterName, "The probability to apply pruning", new PercentValue(0.1)));
146      #endregion
147    }
148
149    protected SymbolicDataAnalysisUsefulGenesAnalyzer(SymbolicDataAnalysisUsefulGenesAnalyzer original, Cloner cloner)
150      : base(original, cloner) { }
151
152    public override IDeepCloneable Clone(Cloner cloner) {
153      return new SymbolicDataAnalysisUsefulGenesAnalyzer(this, cloner);
154    }
155
156    [StorableConstructor]
157    protected SymbolicDataAnalysisUsefulGenesAnalyzer(bool deserializing)
158      : base(deserializing) {
159    }
160
161    public bool EnabledByDefault {
162      get { return false; }
163    }
164
165    public override void InitializeState() {
166      UpdateCounter = 0;
167      base.InitializeState();
168    }
169
170    public override IOperation Apply() {
171      int generations = GenerationsParameter.ActualValue.Value;
172      #region Update counter & update interval
173      if (generations < MinimumGenerations)
174        return base.Apply();
175      UpdateCounter++;
176      if (UpdateCounter != UpdateInterval) {
177        return base.Apply();
178      }
179      UpdateCounter = 0;
180      #endregion
181
182      var trees = SymbolicExpressionTreeParameter.ActualValue.ToArray();
183      var qualities = QualityParameter.ActualValue.ToArray();
184
185      Array.Sort(qualities, trees); // sort trees array based on qualities
186
187      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
188      var problemData = (IRegressionProblemData)ProblemDataParameter.ActualValue;
189      var rows = problemData.TrainingIndices.ToList();
190      var random = RandomParameter.ActualValue;
191
192      int replacedTrees = 0;
193      int avgLengthReduction = 0;
194      double avgQualityImprovement = 0;
195
196      var count = (int)Math.Floor(trees.Length * PercentageOfBestSolutions);
197
198      for (int i = trees.Length - 1; i >= trees.Length - count; --i) {
199        if (random.NextDouble() > PruningProbability) continue;
200        var tree = trees[i];
201        var quality = qualities[i].Value;
202        var root = tree.Root.GetSubtree(0).GetSubtree(0);
203
204        foreach (var s in root.IterateNodesPrefix().Skip(1)) {
205          var r2 = EvaluateSubtree(s, interpreter, problemData, rows);
206          if (double.IsNaN(r2) || r2 <= quality) continue;
207          avgQualityImprovement += (r2 - quality);
208          avgLengthReduction += (tree.Length - s.GetLength());
209          replacedTrees++;
210          // replace tree with its own subtree
211          var startNode = tree.Root.GetSubtree(0);
212          startNode.RemoveSubtree(0);
213          startNode.AddSubtree(s);
214          // update tree quality
215          qualities[i].Value = r2;
216
217          break;
218        }
219      }
220
221      avgQualityImprovement = replacedTrees == 0 ? 0 : avgQualityImprovement / replacedTrees;
222      avgLengthReduction = replacedTrees == 0 ? 0 : (int)Math.Round((double)avgLengthReduction / replacedTrees);
223
224      var results = ResultCollectionParameter.ActualValue;
225      DataTable table;
226      if (results.ContainsKey(PromotedSubtreesResultName)) {
227        table = (DataTable)results[PromotedSubtreesResultName].Value;
228      } else {
229        table = new DataTable(PromotedSubtreesResultName);
230        table.Rows.Add(new DataRow(PromotedSubtreesResultName));
231        results.Add(new Result(PromotedSubtreesResultName, table));
232      }
233      table.Rows[PromotedSubtreesResultName].Values.Add(replacedTrees);
234
235      if (results.ContainsKey(AverageQualityImprovementResultName)) {
236        table = (DataTable)results[AverageQualityImprovementResultName].Value;
237      } else {
238        table = new DataTable(AverageQualityImprovementResultName);
239        table.Rows.Add(new DataRow(AverageQualityImprovementResultName));
240        results.Add(new Result(AverageQualityImprovementResultName, table));
241      }
242      table.Rows[AverageQualityImprovementResultName].Values.Add(avgQualityImprovement);
243
244      if (results.ContainsKey(AverageLengthReductionResultName)) {
245        table = (DataTable)results[AverageLengthReductionResultName].Value;
246      } else {
247        table = new DataTable(AverageLengthReductionResultName);
248        table.Rows.Add(new DataRow(AverageLengthReductionResultName));
249        results.Add(new Result(AverageLengthReductionResultName, table));
250      }
251      table.Rows[AverageLengthReductionResultName].Values.Add(avgLengthReduction);
252
253      return base.Apply();
254    }
255
256    private static double EvaluateSubtree(ISymbolicExpressionTreeNode subtree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IRegressionProblemData problemData, List<int> rows) {
257      var linearInterpreter = (SymbolicDataAnalysisExpressionTreeLinearInterpreter)interpreter;
258      var dataset = problemData.Dataset;
259
260      var targetValues = dataset.GetDoubleValues(problemData.TargetVariable, rows);
261      var estimatedValues = linearInterpreter.GetValues(subtree, dataset, rows);
262
263      OnlineCalculatorError error;
264      double r = OnlinePearsonsRCalculator.Calculate(targetValues, estimatedValues, out error);
265      return (error == OnlineCalculatorError.None) ? r * r : double.NaN;
266    }
267  }
268}
Note: See TracBrowser for help on using the repository browser.