Free cookie consent management tool by TermsFeed Policy Generator

source: branches/1772_HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisUsefulGenesAnalyzer.cs @ 17434

Last change on this file since 17434 was 17434, checked in by bburlacu, 5 years ago

#1772: Merge trunk changes and fix all errors and compilation warnings.

File size: 12.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using HEAL.Attic;
23using HeuristicLab.Analysis;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Operators;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using System;
32using System.Collections.Generic;
33using System.Linq;
34
35namespace HeuristicLab.Problems.DataAnalysis.Symbolic
36{
37  [Item("SymbolicDataAnalysisUsefulGenesAnalyzer", "An analyzer which performs pruning by promoting genes in the population that outperform their containing individuals (the individuals are replaced by their subparts).")]
38  [StorableType("2CB2C4EE-9360-4C81-B354-926510E0DCD0")]
39  public class SymbolicDataAnalysisUsefulGenesAnalyzer : SingleSuccessorOperator, ISymbolicDataAnalysisAnalyzer
40  {
41    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
42    private const string QualityParameterName = "Quality";
43    private const string ResultCollectionParameterName = "Results";
44    private const string SymbolicDataAnalysisTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
45    private const string ProblemDataParameterName = "ProblemData";
46    private const string GenerationsParameterName = "Generations";
47    private const string UpdateCounterParameterName = "UpdateCounter";
48    private const string UpdateIntervalParameterName = "UpdateInterval";
49    private const string MinimumGenerationsParameterName = "MinimumGenerations";
50    private const string PruningProbabilityParameterName = "PruningProbability";
51    private const string PercentageOfBestSolutionsParameterName = "PercentageOfBestSolutions";
52    private const string PromotedSubtreesResultName = "Promoted subtrees";
53    private const string AverageQualityImprovementResultName = "Average quality improvement";
54    private const string AverageLengthReductionResultName = "Average length reduction";
55    private const string RandomParameterName = "Random";
56
57    #region Parameters
58    public IScopeTreeLookupParameter<ISymbolicExpressionTree> SymbolicExpressionTreeParameter {
59      get { return (IScopeTreeLookupParameter<ISymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
60    }
61
62    public IScopeTreeLookupParameter<DoubleValue> QualityParameter {
63      get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
64    }
65
66    public ILookupParameter<IRandom> RandomParameter {
67      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
68    }
69
70    public ILookupParameter<ResultCollection> ResultCollectionParameter {
71      get { return (ILookupParameter<ResultCollection>)Parameters[ResultCollectionParameterName]; }
72    }
73
74    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> SymbolicDataAnalysisTreeInterpreterParameter {
75      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[SymbolicDataAnalysisTreeInterpreterParameterName]; }
76    }
77
78    public ILookupParameter<IDataAnalysisProblemData> ProblemDataParameter {
79      get { return (ILookupParameter<IDataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
80    }
81
82    public ILookupParameter<IntValue> GenerationsParameter {
83      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
84    }
85
86    public ValueParameter<IntValue> UpdateCounterParameter {
87      get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
88    }
89
90    public ValueParameter<IntValue> UpdateIntervalParameter {
91      get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
92    }
93
94    public ValueParameter<IntValue> MinimumGenerationsParameter {
95      get { return (ValueParameter<IntValue>)Parameters[MinimumGenerationsParameterName]; }
96    }
97
98    public ValueParameter<PercentValue> PercentageOfBestSolutionsParameter {
99      get { return (ValueParameter<PercentValue>)Parameters[PercentageOfBestSolutionsParameterName]; }
100    }
101
102    public ValueParameter<PercentValue> PruningProbabilityParameter {
103      get { return (ValueParameter<PercentValue>)Parameters[PruningProbabilityParameterName]; }
104    }
105    #endregion
106
107    #region Parameter properties
108    public int UpdateCounter {
109      get { return UpdateCounterParameter.Value.Value; }
110      set { UpdateCounterParameter.Value.Value = value; }
111    }
112
113    public int UpdateInterval {
114      get { return UpdateIntervalParameter.Value.Value; }
115      set { UpdateIntervalParameter.Value.Value = value; }
116    }
117
118    public int MinimumGenerations {
119      get { return MinimumGenerationsParameter.Value.Value; }
120      set { MinimumGenerationsParameter.Value.Value = value; }
121    }
122
123    public double PercentageOfBestSolutions {
124      get { return PercentageOfBestSolutionsParameter.Value.Value; }
125      set { PercentageOfBestSolutionsParameter.Value.Value = value; }
126    }
127
128    public double PruningProbability {
129      get { return PruningProbabilityParameter.Value.Value; }
130      set { PruningProbabilityParameter.Value.Value = value; }
131    }
132    #endregion
133
134    public SymbolicDataAnalysisUsefulGenesAnalyzer()
135    {
136      #region Add parameters
137      Parameters.Add(new ScopeTreeLookupParameter<ISymbolicExpressionTree>(SymbolicExpressionTreeParameterName));
138      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName));
139      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName));
140      Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName));
141      Parameters.Add(new LookupParameter<IDataAnalysisProblemData>(ProblemDataParameterName));
142      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicDataAnalysisTreeInterpreterParameterName));
143      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName));
144      Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, new IntValue(0)));
145      Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, new IntValue(1)));
146      Parameters.Add(new ValueParameter<IntValue>(MinimumGenerationsParameterName, "The minimum number of generations the algorithm must be let to evolve before applying this analyzer.", new IntValue(50)));
147      Parameters.Add(new ValueParameter<PercentValue>(PercentageOfBestSolutionsParameterName, "How many of the best individuals should be pruned using this method.", new PercentValue(1.0)));
148      Parameters.Add(new ValueParameter<PercentValue>(PruningProbabilityParameterName, "The probability to apply pruning", new PercentValue(0.1)));
149      #endregion
150    }
151
152    protected SymbolicDataAnalysisUsefulGenesAnalyzer(SymbolicDataAnalysisUsefulGenesAnalyzer original, Cloner cloner)
153      : base(original, cloner) { }
154
155    public override IDeepCloneable Clone(Cloner cloner)
156    {
157      return new SymbolicDataAnalysisUsefulGenesAnalyzer(this, cloner);
158    }
159
160    [StorableConstructor]
161    protected SymbolicDataAnalysisUsefulGenesAnalyzer(StorableConstructorFlag _) : base(_) { }
162
163    public bool EnabledByDefault {
164      get { return false; }
165    }
166
167    public override void InitializeState()
168    {
169      UpdateCounter = 0;
170      base.InitializeState();
171    }
172
173    public override IOperation Apply()
174    {
175      int generations = GenerationsParameter.ActualValue.Value;
176      #region Update counter & update interval
177      if (generations < MinimumGenerations)
178        return base.Apply();
179      UpdateCounter++;
180      if (UpdateCounter != UpdateInterval)
181      {
182        return base.Apply();
183      }
184      UpdateCounter = 0;
185      #endregion
186
187      var trees = SymbolicExpressionTreeParameter.ActualValue.ToArray();
188      var qualities = QualityParameter.ActualValue.ToArray();
189
190      Array.Sort(qualities, trees); // sort trees array based on qualities
191
192      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
193      var problemData = (IRegressionProblemData)ProblemDataParameter.ActualValue;
194      var rows = problemData.TrainingIndices.ToList();
195      var random = RandomParameter.ActualValue;
196
197      int replacedTrees = 0;
198      int avgLengthReduction = 0;
199      double avgQualityImprovement = 0;
200
201      var count = (int)Math.Floor(trees.Length * PercentageOfBestSolutions);
202
203      for (int i = trees.Length - 1; i >= trees.Length - count; --i)
204      {
205        if (random.NextDouble() > PruningProbability) continue;
206        var tree = trees[i];
207        var quality = qualities[i].Value;
208        var root = tree.Root.GetSubtree(0).GetSubtree(0);
209
210        foreach (var s in root.IterateNodesPrefix().Skip(1))
211        {
212          var r2 = EvaluateSubtree(s, interpreter, problemData, rows);
213          if (double.IsNaN(r2) || r2 <= quality) continue;
214          avgQualityImprovement += (r2 - quality);
215          avgLengthReduction += (tree.Length - s.GetLength());
216          replacedTrees++;
217          // replace tree with its own subtree
218          var startNode = tree.Root.GetSubtree(0);
219          startNode.RemoveSubtree(0);
220          startNode.AddSubtree(s);
221          // update tree quality
222          qualities[i].Value = r2;
223
224          break;
225        }
226      }
227
228      avgQualityImprovement = replacedTrees == 0 ? 0 : avgQualityImprovement / replacedTrees;
229      avgLengthReduction = replacedTrees == 0 ? 0 : (int)Math.Round((double)avgLengthReduction / replacedTrees);
230
231      var results = ResultCollectionParameter.ActualValue;
232      DataTable table;
233      if (results.ContainsKey(PromotedSubtreesResultName))
234      {
235        table = (DataTable)results[PromotedSubtreesResultName].Value;
236      }
237      else
238      {
239        table = new DataTable(PromotedSubtreesResultName);
240        table.Rows.Add(new DataRow(PromotedSubtreesResultName));
241        results.Add(new Result(PromotedSubtreesResultName, table));
242      }
243      table.Rows[PromotedSubtreesResultName].Values.Add(replacedTrees);
244
245      if (results.ContainsKey(AverageQualityImprovementResultName))
246      {
247        table = (DataTable)results[AverageQualityImprovementResultName].Value;
248      }
249      else
250      {
251        table = new DataTable(AverageQualityImprovementResultName);
252        table.Rows.Add(new DataRow(AverageQualityImprovementResultName));
253        results.Add(new Result(AverageQualityImprovementResultName, table));
254      }
255      table.Rows[AverageQualityImprovementResultName].Values.Add(avgQualityImprovement);
256
257      if (results.ContainsKey(AverageLengthReductionResultName))
258      {
259        table = (DataTable)results[AverageLengthReductionResultName].Value;
260      }
261      else
262      {
263        table = new DataTable(AverageLengthReductionResultName);
264        table.Rows.Add(new DataRow(AverageLengthReductionResultName));
265        results.Add(new Result(AverageLengthReductionResultName, table));
266      }
267      table.Rows[AverageLengthReductionResultName].Values.Add(avgLengthReduction);
268
269      return base.Apply();
270    }
271
272    private static double EvaluateSubtree(ISymbolicExpressionTreeNode subtree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IRegressionProblemData problemData, List<int> rows)
273    {
274      var linearInterpreter = (SymbolicDataAnalysisExpressionTreeLinearInterpreter)interpreter;
275      var dataset = problemData.Dataset;
276
277      var targetValues = dataset.GetDoubleValues(problemData.TargetVariable, rows);
278      var estimatedValues = linearInterpreter.GetValues(subtree, dataset, rows);
279
280      OnlineCalculatorError error;
281      double r = OnlinePearsonsRCalculator.Calculate(targetValues, estimatedValues, out error);
282      return (error == OnlineCalculatorError.None) ? r * r : double.NaN;
283    }
284  }
285}
Note: See TracBrowser for help on using the repository browser.