Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.MultiVariate.Regression/3.3/Symbolic/Analyzer/ValidationBestScaledSymbolicVectorRegressionSolutionAnalyzer.cs @ 6412

Last change on this file since 6412 was 5275, checked in by gkronber, 14 years ago

Merged changes from trunk to data analysis exploration branch and added fractional distance metric evaluator. #1142

File size: 14.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Operators;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.Problems.DataAnalysis.MultiVariate.Regression.Symbolic.Evaluators;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34using HeuristicLab.Problems.DataAnalysis.MultiVariate.Regression.Symbolic.Interfaces;
35using HeuristicLab.Common;
36
37namespace HeuristicLab.Problems.DataAnalysis.MultiVariate.Regression.Symbolic.Analyzers {
38  /// <summary>
39  /// An operator that analyzes the validation best scaled symbolic vector regression solution.
40  /// </summary>
41  [Item("ValidationBestScaledSymbolicVectorRegressionSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic vector regression solution.")]
42  [StorableClass]
43  public sealed class ValidationBestScaledSymbolicVectorRegressionSolutionAnalyzer : SingleSuccessorOperator, IAnalyzer {
44    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
45    private const string ScaledSymbolicExpressionTreeParameterName = "ScaledSymbolicExpressionTree";
46    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
47    private const string ProblemDataParameterName = "ProblemData";
48    private const string ValidationSamplesStartParameterName = "ValidationSamplesStart";
49    private const string ValidationSamplesEndParameterName = "ValidationSamplesEnd";
50    private const string EvaluatorParameterName = "Evaluator";
51    private const string MaximizationParameterName = "Maximization";
52    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
53    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
54    private const string AlphaParameterName = "Alpha";
55    private const string BetaParameterName = "Beta";
56    private const string BestSolutionParameterName = "Best solution (validation)";
57    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
58    private const string CurrentBestValidationQualityParameterName = "Current best validation quality";
59    private const string BestSolutionQualityValuesParameterName = "Validation Quality";
60    private const string ResultsParameterName = "Results";
61    private const string BestKnownQualityParameterName = "BestKnownQuality";
62
63    #region parameter properties
64    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
65      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
66    }
67    public ScopeTreeLookupParameter<DoubleArray> AlphaParameter {
68      get { return (ScopeTreeLookupParameter<DoubleArray>)Parameters[AlphaParameterName]; }
69    }
70    public ScopeTreeLookupParameter<DoubleArray> BetaParameter {
71      get { return (ScopeTreeLookupParameter<DoubleArray>)Parameters[BetaParameterName]; }
72    }
73    public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
74      get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
75    }
76    public IValueLookupParameter<MultiVariateDataAnalysisProblemData> ProblemDataParameter {
77      get { return (IValueLookupParameter<MultiVariateDataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
78    }
79    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
80      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
81    }
82    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
83      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
84    }
85    public IValueLookupParameter<ISingleObjectiveSymbolicVectorRegressionEvaluator> EvaluatorParameter {
86      get { return (IValueLookupParameter<ISingleObjectiveSymbolicVectorRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
87    }
88    public IValueLookupParameter<BoolValue> MaximizationParameter {
89      get { return (IValueLookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
90    }
91    public IValueLookupParameter<DoubleArray> UpperEstimationLimitParameter {
92      get { return (IValueLookupParameter<DoubleArray>)Parameters[UpperEstimationLimitParameterName]; }
93    }
94    public IValueLookupParameter<DoubleArray> LowerEstimationLimitParameter {
95      get { return (IValueLookupParameter<DoubleArray>)Parameters[LowerEstimationLimitParameterName]; }
96    }
97    public ILookupParameter<SymbolicExpressionTree> BestSolutionParameter {
98      get { return (ILookupParameter<SymbolicExpressionTree>)Parameters[BestSolutionParameterName]; }
99    }
100    public ILookupParameter<DoubleValue> BestSolutionQualityParameter {
101      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
102    }
103    public ILookupParameter<ResultCollection> ResultsParameter {
104      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
105    }
106    public ILookupParameter<DoubleValue> BestKnownQualityParameter {
107      get { return (ILookupParameter<DoubleValue>)Parameters[BestKnownQualityParameterName]; }
108    }
109    #endregion
110    #region properties
111    public MultiVariateDataAnalysisProblemData ProblemData {
112      get { return ProblemDataParameter.ActualValue; }
113    }
114    public ItemArray<DoubleArray> Alpha {
115      get { return AlphaParameter.ActualValue; }
116    }
117    public ItemArray<DoubleArray> Beta {
118      get { return BetaParameter.ActualValue; }
119    }
120    public DoubleArray LowerEstimationLimit {
121      get { return LowerEstimationLimitParameter.ActualValue; }
122    }
123    public DoubleArray UpperEstimationLimit {
124      get { return UpperEstimationLimitParameter.ActualValue; }
125    }
126    public ISingleObjectiveSymbolicVectorRegressionEvaluator Evaluator {
127      get { return EvaluatorParameter.ActualValue; }
128    }
129    public BoolValue Maximization {
130      get { return MaximizationParameter.ActualValue; }
131    }
132    public DoubleValue BestSolutionQuality {
133      get { return BestSolutionQualityParameter.ActualValue; }
134    }
135    #endregion
136    [StorableConstructor]
137    protected ValidationBestScaledSymbolicVectorRegressionSolutionAnalyzer(bool deserializing) : base(deserializing) { }
138    protected ValidationBestScaledSymbolicVectorRegressionSolutionAnalyzer(ValidationBestScaledSymbolicVectorRegressionSolutionAnalyzer original, Cloner cloner)
139      : base(original, cloner) {
140    }
141    public ValidationBestScaledSymbolicVectorRegressionSolutionAnalyzer()
142      : base() {
143      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
144      Parameters.Add(new ScopeTreeLookupParameter<DoubleArray>(AlphaParameterName, "The alpha parameter for linear scaling."));
145      Parameters.Add(new ScopeTreeLookupParameter<DoubleArray>(BetaParameterName, "The beta parameter for linear scaling."));
146      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
147      Parameters.Add(new ValueLookupParameter<MultiVariateDataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
148      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
149      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
150      Parameters.Add(new ValueLookupParameter<ISingleObjectiveSymbolicVectorRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
151      Parameters.Add(new ValueLookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
152      Parameters.Add(new ValueLookupParameter<DoubleArray>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
153      Parameters.Add(new ValueLookupParameter<DoubleArray>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
154      Parameters.Add(new LookupParameter<SymbolicExpressionTree>(BestSolutionParameterName, "The best symbolic regression solution."));
155      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
156      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
157      Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set."));
158    }
159
160    public override IDeepCloneable Clone(Cloner cloner) {
161      return new ValidationBestScaledSymbolicVectorRegressionSolutionAnalyzer(this, cloner);
162    }
163
164    public override IOperation Apply() {
165      var trees = SymbolicExpressionTreeParameter.ActualValue;
166      IEnumerable<SymbolicExpressionTree> scaledTrees;
167      if (Alpha.Length == trees.Length) {
168        scaledTrees = from i in Enumerable.Range(0, trees.Length)
169                      select SymbolicVectorRegressionSolutionLinearScaler.Scale(trees[i], Beta[i].ToArray(), Alpha[i].ToArray());
170      } else {
171        scaledTrees = trees;
172      }
173      IEnumerable<string> selectedTargetVariables = from item in ProblemData.TargetVariables.CheckedItems
174                                                    select item.Value.Value;
175      var interpreter = SymbolicExpressionTreeInterpreterParameter.ActualValue;
176      int validationStart = ValidationSamplesStartParameter.ActualValue.Value;
177      int validationEnd = ValidationSamplesEndParameter.ActualValue.Value;
178      IEnumerable<int> rows = Enumerable.Range(validationStart, validationEnd - validationStart);
179      SymbolicExpressionTree bestTree = null;
180      double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
181      foreach (var tree in scaledTrees) {
182        // calculate quality on validation set
183        double quality = Evaluator.Evaluate(tree, interpreter, ProblemData, selectedTargetVariables, rows, LowerEstimationLimit, UpperEstimationLimit);
184        if ((Maximization.Value && quality > bestQuality) ||
185            (!Maximization.Value && quality < bestQuality)) {
186          bestQuality = quality;
187          bestTree = tree;
188        }
189      }
190      bool newBest =
191        BestSolutionQualityParameter.ActualValue == null ||
192        (Maximization.Value && bestQuality > BestSolutionQuality.Value) ||
193                     (!Maximization.Value && bestQuality < BestSolutionQuality.Value);
194      if (newBest) {
195        var bestSolution = bestTree;
196
197        //bestSolution.Name = BestSolutionParameterName;
198        //solution.Description = "Best solution on validation partition found over the whole run.";
199
200        BestSolutionParameter.ActualValue = bestSolution;
201        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestQuality);
202      }
203
204      // update results
205      var results = ResultsParameter.ActualValue;
206      if (!results.ContainsKey(BestSolutionQualityValuesParameterName)) {
207        results.Add(new Result(BestSolutionParameterName, BestSolutionParameter.ActualValue));
208        results.Add(new Result(BestSolutionQualityValuesParameterName, new DataTable(BestSolutionQualityValuesParameterName, BestSolutionQualityValuesParameterName)));
209        results.Add(new Result(BestSolutionQualityParameterName, new DoubleValue()));
210        results.Add(new Result(CurrentBestValidationQualityParameterName, new DoubleValue()));
211      }
212      results[BestSolutionParameterName].Value = BestSolutionParameter.ActualValue;
213      results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
214      results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestQuality);
215
216      DataTable validationValues = (DataTable)results[BestSolutionQualityValuesParameterName].Value;
217      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
218      AddValue(validationValues, bestQuality, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
219
220      return base.Apply();
221    }
222
223    private static void AddValue(DataTable table, double data, string name, string description) {
224      DataRow row;
225      table.Rows.TryGetValue(name, out row);
226      if (row == null) {
227        row = new DataRow(name, description);
228        row.Values.Add(data);
229        table.Rows.Add(row);
230      } else {
231        row.Values.Add(data);
232      }
233    }
234  }
235}
Note: See TracBrowser for help on using the repository browser.