Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisSingleObjectiveValidationParetoBestSolutionAnalyzer.cs @ 8694

Last change on this file since 8694 was 8213, checked in by bburlacu, 13 years ago

#1772: Performance improvements for the GenealogyGraph. Minor refactoring to VisualGenealogyGraphArc and VisualGenealogyGraphNode classes. Added new functionality to the SymbolicExpressionTreeFragmentsAnalyzer, minor refactoring in the other two analyzers. Refactored View code. Updated project references and plugin dependencies and added HeuristicLab.Problems.DataAnalysis.Symbolic to the branch.

File size: 11.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
34  /// <summary>
35  /// An operator that collects the Pareto-best symbolic data analysis solutions for single objective symbolic data analysis problems.
36  /// </summary>
37  [Item("SymbolicDataAnalysisSingleObjectiveValidationParetoBestSolutionAnalyzer", "An operator that analyzes the Pareto-best symbolic data analysis solution for single objective symbolic data analysis problems.")]
38  [StorableClass]
39  public abstract class SymbolicDataAnalysisSingleObjectiveValidationParetoBestSolutionAnalyzer<S, T, U> : SymbolicDataAnalysisSingleObjectiveValidationAnalyzer<T, U>, ISymbolicDataAnalysisBoundedOperator
40    where S : class, ISymbolicDataAnalysisSolution
41    where T : class, ISymbolicDataAnalysisSingleObjectiveEvaluator<U>
42    where U : class, IDataAnalysisProblemData {
43    private const string ValidationBestSolutionsParameterName = "Best validation solutions";
44    private const string ValidationBestSolutionQualitiesParameterName = "Best validation solution qualities";
45    private const string ComplexityParameterName = "Complexity";
46    private const string EstimationLimitsParameterName = "EstimationLimits";
47
48    public override bool EnabledByDefault {
49      get { return false; }
50    }
51
52    #region parameter properties
53    public ILookupParameter<ItemList<S>> ValidationBestSolutionsParameter {
54      get { return (ILookupParameter<ItemList<S>>)Parameters[ValidationBestSolutionsParameterName]; }
55    }
56    public ILookupParameter<ItemList<DoubleArray>> ValidationBestSolutionQualitiesParameter {
57      get { return (ILookupParameter<ItemList<DoubleArray>>)Parameters[ValidationBestSolutionQualitiesParameterName]; }
58    }
59    public IScopeTreeLookupParameter<DoubleValue> ComplexityParameter {
60      get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters[ComplexityParameterName]; }
61    }
62    public IValueLookupParameter<DoubleLimit> EstimationLimitsParameter {
63      get { return (IValueLookupParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName]; }
64    }
65
66    #endregion
67    #region properties
68    public ItemList<S> ValidationBestSolutions {
69      get { return ValidationBestSolutionsParameter.ActualValue; }
70      set { ValidationBestSolutionsParameter.ActualValue = value; }
71    }
72    public ItemList<DoubleArray> ValidationBestSolutionQualities {
73      get { return ValidationBestSolutionQualitiesParameter.ActualValue; }
74      set { ValidationBestSolutionQualitiesParameter.ActualValue = value; }
75    }
76    #endregion
77
78    [StorableConstructor]
79    protected SymbolicDataAnalysisSingleObjectiveValidationParetoBestSolutionAnalyzer(bool deserializing) : base(deserializing) { }
80    protected SymbolicDataAnalysisSingleObjectiveValidationParetoBestSolutionAnalyzer(SymbolicDataAnalysisSingleObjectiveValidationParetoBestSolutionAnalyzer<S, T, U> original, Cloner cloner) : base(original, cloner) { }
81    public SymbolicDataAnalysisSingleObjectiveValidationParetoBestSolutionAnalyzer()
82      : base() {
83      Parameters.Add(new LookupParameter<ItemList<S>>(ValidationBestSolutionsParameterName, "The validation best (Pareto-optimal) symbolic data analysis solutions."));
84      Parameters.Add(new LookupParameter<ItemList<DoubleArray>>(ValidationBestSolutionQualitiesParameterName, "The qualities of the validation best (Pareto-optimal) solutions."));
85      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(ComplexityParameterName, "The complexity of each tree."));
86      Parameters.Add(new ValueLookupParameter<DoubleLimit>(EstimationLimitsParameterName, "The lower and upper limit for the estimated values produced by the symbolic classification model."));
87    }
88
89    public override IOperation Apply() {
90      IEnumerable<int> rows = GenerateRowsToEvaluate();
91      if (!rows.Any()) return base.Apply();
92
93      #region find best tree
94      var evaluator = EvaluatorParameter.ActualValue;
95      var problemData = ProblemDataParameter.ActualValue;
96      ISymbolicExpressionTree[] tree = SymbolicExpressionTree.ToArray();
97
98      // sort is ascending and we take the first n% => order so that best solutions are smallest
99      // sort order is determined by maximization parameter
100      double[] trainingQuality;
101      if (Maximization.Value) {
102        // largest values must be sorted first
103        trainingQuality = Quality.Select(x => -x.Value).ToArray();
104      } else {
105        // smallest values must be sorted first
106        trainingQuality = Quality.Select(x => x.Value).ToArray();
107      }
108
109      int[] treeIndexes = Enumerable.Range(0, tree.Length).ToArray();
110
111      // sort trees by training qualities
112      Array.Sort(trainingQuality, treeIndexes);
113
114      // number of best training solutions to validate (at least 1)
115      int topN = (int)Math.Max(tree.Length * PercentageOfBestSolutionsParameter.ActualValue.Value, 1);
116
117      IExecutionContext childContext = (IExecutionContext)ExecutionContext.CreateChildOperation(evaluator);
118      // evaluate best n training trees on validiation set
119      var qualities = treeIndexes
120        .Select(i => tree[i])
121        .Take(topN)
122        .AsParallel()
123        .Select(t => evaluator.Evaluate(childContext, t, problemData, rows))
124        .ToArray();
125      #endregion
126
127      var results = ResultCollection;
128      // create empty parameter and result values
129      if (ValidationBestSolutions == null) {
130        ValidationBestSolutions = new ItemList<S>();
131        ValidationBestSolutionQualities = new ItemList<DoubleArray>();
132        results.Add(new Result(ValidationBestSolutionQualitiesParameter.Name, ValidationBestSolutionQualitiesParameter.Description, ValidationBestSolutionQualities));
133        results.Add(new Result(ValidationBestSolutionsParameter.Name, ValidationBestSolutionsParameter.Description, ValidationBestSolutions));
134      }
135
136      IList<Tuple<double, double>> validationBestQualities = ValidationBestSolutionQualities
137        .Select(x => Tuple.Create(x[0], x[1]))
138        .ToList();
139
140      #region find best trees
141      IList<int> nonDominatedIndexes = new List<int>();
142
143      List<double> complexities;
144      if (ComplexityParameter.ActualValue != null && ComplexityParameter.ActualValue.Length == trainingQuality.Length) {
145        complexities = ComplexityParameter.ActualValue.Select(x => x.Value).ToList();
146      } else {
147        complexities = tree.Select(t => (double)t.Length).ToList();
148      }
149      List<Tuple<double, double>> fitness = new List<Tuple<double, double>>();
150      for (int i = 0; i < qualities.Length; i++)
151        fitness.Add(Tuple.Create(qualities[i], complexities[treeIndexes[i]]));
152
153      var maximization = Tuple.Create(Maximization.Value, false); // complexity must be minimized
154      List<Tuple<double, double>> newNonDominatedQualities = new List<Tuple<double, double>>();
155      for (int i = 0; i < fitness.Count; i++) {
156        if (IsNonDominated(fitness[i], validationBestQualities, maximization) &&
157          IsNonDominated(fitness[i], newNonDominatedQualities, maximization) &&
158          IsNonDominated(fitness[i], fitness.Skip(i + 1), maximization)) {
159          if (!newNonDominatedQualities.Contains(fitness[i])) {
160            newNonDominatedQualities.Add(fitness[i]);
161            nonDominatedIndexes.Add(i);
162          }
163        }
164      }
165      #endregion
166
167      #region update Pareto-optimal solution archive
168      if (nonDominatedIndexes.Count > 0) {
169        ItemList<DoubleArray> nonDominatedQualities = new ItemList<DoubleArray>();
170        ItemList<S> nonDominatedSolutions = new ItemList<S>();
171        // add all new non-dominated solutions to the archive
172        foreach (var index in nonDominatedIndexes) {
173          S solution = CreateSolution(tree[treeIndexes[index]]);
174          nonDominatedSolutions.Add(solution);
175          nonDominatedQualities.Add(new DoubleArray(new double[] { fitness[index].Item1, fitness[index].Item2 }));
176        }
177        // add old non-dominated solutions only if they are not dominated by one of the new solutions
178        for (int i = 0; i < validationBestQualities.Count; i++) {
179          if (IsNonDominated(validationBestQualities[i], newNonDominatedQualities, maximization)) {
180            if (!newNonDominatedQualities.Contains(validationBestQualities[i])) {
181              nonDominatedSolutions.Add(ValidationBestSolutions[i]);
182              nonDominatedQualities.Add(ValidationBestSolutionQualities[i]);
183            }
184          }
185        }
186
187        // make sure solutions and qualities are ordered in the results
188        var orderedIndexes =
189          nonDominatedSolutions.Select((s, i) => i).OrderBy(i => nonDominatedQualities[i][0]).ToArray();
190
191        var orderedNonDominatedSolutions = new ItemList<S>();
192        var orderedNonDominatedQualities = new ItemList<DoubleArray>();
193        foreach (var i in orderedIndexes) {
194          orderedNonDominatedQualities.Add(nonDominatedQualities[i]);
195          orderedNonDominatedSolutions.Add(nonDominatedSolutions[i]);
196        }
197
198        ValidationBestSolutions = orderedNonDominatedSolutions;
199        ValidationBestSolutionQualities = orderedNonDominatedQualities;
200
201        results[ValidationBestSolutionsParameter.Name].Value = orderedNonDominatedSolutions;
202        results[ValidationBestSolutionQualitiesParameter.Name].Value = orderedNonDominatedQualities;
203      }
204      #endregion
205      return base.Apply();
206    }
207
208    protected abstract S CreateSolution(ISymbolicExpressionTree bestTree);
209
210    private bool IsNonDominated(Tuple<double, double> point, IEnumerable<Tuple<double, double>> points, Tuple<bool, bool> maximization) {
211      return !points.Any(p => IsBetterOrEqual(p.Item1, point.Item1, maximization.Item1) &&
212                             IsBetterOrEqual(p.Item2, point.Item2, maximization.Item2));
213    }
214    private bool IsBetterOrEqual(double lhs, double rhs, bool maximization) {
215      if (maximization) return lhs >= rhs;
216      else return lhs <= rhs;
217    }
218  }
219}
Note: See TracBrowser for help on using the repository browser.