Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Classification/HeuristicLab.Problems.DataAnalysis.Classification/3.3/Symbolic/Analyzer/ValidationBestSymbolicClassificationSolutionAnalyzer.cs @ 4366

Last change on this file since 4366 was 4366, checked in by mkommend, 14 years ago

added draft version of classification (ticket #939)

File size: 13.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using HeuristicLab.Core;
24using HeuristicLab.Data;
25using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
26using HeuristicLab.Operators;
27using HeuristicLab.Optimization;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
31using HeuristicLab.Problems.DataAnalysis.Symbolic;
32
33namespace HeuristicLab.Problems.DataAnalysis.Classification {
34  [Item("ValidationBestSymbolicClassificationSolutionAnalyzer", "An operator that analyzes the validation best symbolic classification solution.")]
35  [StorableClass]
36  public class ValidationBestSymbolicClassificationSolutionAnalyzer : SingleSuccessorOperator, ISymbolicClassificationAnalyzer {
37    private const string MaximizationParameterName = "Maximization";
38    private const string GenerationsParameterName = "Generations";
39    private const string RandomParameterName = "Random";
40    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
41    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
42
43    private const string ClassificationProblemDataParameterName = "ClassificationProblemData";
44    private const string EvaluatorParameterName = "Evaluator";
45    private const string ValidationSamplesStartParameterName = "SamplesStart";
46    private const string ValidationSamplesEndParameterName = "SamplesEnd";
47    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
48    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
49    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
50
51    private const string ResultsParameterName = "Results";
52    private const string BestValidationQualityParameterName = "BestValidationQuality";
53    private const string BestValidationSolutionParameterName = "BestValidationSolution";
54
55    #region parameter properties
56    public ILookupParameter<BoolValue> MaximizationParameter {
57      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
58    }
59    public ILookupParameter<IntValue> GenerationsParameter {
60      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
61    }
62    public ILookupParameter<IRandom> RandomParameter {
63      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
64    }
65    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
66      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
67    }
68    public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
69      get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
70    }
71
72    public ILookupParameter<ClassificationProblemData> ClassificationProblemDataParameter {
73      get { return (ILookupParameter<ClassificationProblemData>)Parameters[ClassificationProblemDataParameterName]; }
74    }
75    public ILookupParameter<ISymbolicClassificationEvaluator> EvaluatorParameter {
76      get { return (ILookupParameter<ISymbolicClassificationEvaluator>)Parameters[EvaluatorParameterName]; }
77    }
78    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
79      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
80    }
81    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
82      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
83    }
84    public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
85      get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
86    }
87    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
88      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
89    }
90    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
91      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
92    }
93
94    public ILookupParameter<ResultCollection> ResultsParameter {
95      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
96    }
97    public ILookupParameter<DoubleValue> BestValidationQualityParameter {
98      get { return (ILookupParameter<DoubleValue>)Parameters[BestValidationQualityParameterName]; }
99    }
100    public ILookupParameter<SymbolicClassificationSolution> BestValidationSolutionParameter {
101      get { return (ILookupParameter<SymbolicClassificationSolution>)Parameters[BestValidationSolutionParameterName]; }
102    }
103    #endregion
104    #region properties
105    public BoolValue Maximization {
106      get { return MaximizationParameter.ActualValue; }
107    }
108    public IntValue Generations {
109      get { return GenerationsParameter.ActualValue; }
110    }
111    public IRandom Random {
112      get { return RandomParameter.ActualValue; }
113    }
114    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTree {
115      get { return SymbolicExpressionTreeParameter.ActualValue; }
116    }
117    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
118      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
119    }
120
121    public ClassificationProblemData ClassificationProblemData {
122      get { return ClassificationProblemDataParameter.ActualValue; }
123    }
124    public ISymbolicClassificationEvaluator Evaluator {
125      get { return EvaluatorParameter.ActualValue; }
126    }
127    public IntValue ValidiationSamplesStart {
128      get { return ValidationSamplesStartParameter.ActualValue; }
129    }
130    public IntValue ValidationSamplesEnd {
131      get { return ValidationSamplesEndParameter.ActualValue; }
132    }
133    public PercentValue RelativeNumberOfEvaluatedSamples {
134      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
135    }
136    public DoubleValue UpperEstimationLimit {
137      get { return UpperEstimationLimitParameter.ActualValue; }
138    }
139    public DoubleValue LowerEstimationLimit {
140      get { return LowerEstimationLimitParameter.ActualValue; }
141    }
142
143    public ResultCollection Results {
144      get { return ResultsParameter.ActualValue; }
145    }
146    public DoubleValue BestValidationQuality {
147      get { return BestValidationQualityParameter.ActualValue; }
148      protected set { BestValidationQualityParameter.ActualValue = value; }
149    }
150    public SymbolicClassificationSolution BestValidationSolution {
151      get { return BestValidationSolutionParameter.ActualValue; }
152      protected set { BestValidationSolutionParameter.ActualValue = value; }
153    }
154    #endregion
155
156    public ValidationBestSymbolicClassificationSolutionAnalyzer()
157      : base() {
158      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
159      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
160      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
161      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
162      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
163
164      Parameters.Add(new LookupParameter<ClassificationProblemData>(ClassificationProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
165      Parameters.Add(new LookupParameter<ISymbolicClassificationEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
166      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
167      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
168      Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
169      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
170      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
171
172      Parameters.Add(new ValueLookupParameter<ResultCollection>(ResultsParameterName, "The results collection where the analysis values should be stored."));
173      Parameters.Add(new LookupParameter<DoubleValue>(BestValidationQualityParameterName, "The validation quality of the best solution in the current run."));
174      Parameters.Add(new LookupParameter<SymbolicClassificationSolution>(BestValidationSolutionParameterName, "The best solution on the validation data found in the current run."));
175    }
176
177    [StorableConstructor]
178    private ValidationBestSymbolicClassificationSolutionAnalyzer(bool deserializing) : base(deserializing) { }
179
180    public override IOperation Apply() {
181      var trees = SymbolicExpressionTree;
182      string targetVariable = ClassificationProblemData.TargetVariable.Value;
183
184      // select a random subset of rows in the validation set
185      int validationStart = ValidiationSamplesStart.Value;
186      int validationEnd = ValidationSamplesEnd.Value;
187      int seed = Random.Next();
188      int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
189      if (count == 0) count = 1;
190      IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count);
191
192      double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
193      double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
194
195      double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
196      SymbolicExpressionTree bestTree = null;
197
198      foreach (var tree in trees) {
199        double quality = Evaluator.Evaluate(SymbolicExpressionTreeInterpreter, tree,
200          lowerEstimationLimit, upperEstimationLimit, ClassificationProblemData.Dataset,
201          targetVariable, ClassificationProblemData.SortedClassValues, rows);
202
203        if ((Maximization.Value && quality > bestQuality) ||
204            (!Maximization.Value && quality < bestQuality)) {
205          bestQuality = quality;
206          bestTree = tree;
207        }
208      }
209
210      // if the best validation tree is better than the current best solution => update
211      bool newBest =
212        BestValidationQuality == null ||
213        (Maximization.Value && bestQuality > BestValidationQuality.Value) ||
214        (!Maximization.Value && bestQuality < BestValidationQuality.Value);
215      if (newBest) {
216        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
217          bestTree);
218
219        if (BestValidationSolution == null) {
220          BestValidationSolution = new SymbolicClassificationSolution(ClassificationProblemData, model, LowerEstimationLimit.Value, UpperEstimationLimit.Value);
221          BestValidationSolution.Name = BestValidationSolutionParameterName;
222          BestValidationSolution.Description = "Best solution on validation partition found over the whole run.";
223          BestValidationQuality = new DoubleValue(bestQuality);
224          Results.Add(new Result(BestValidationSolutionParameterName, BestValidationSolution));
225          Results.Add(new Result(BestValidationQualityParameterName, BestValidationQuality));
226
227        } else {
228          BestValidationSolution.Model = model;
229          BestValidationQuality.Value = bestQuality;
230        }
231      }
232      return base.Apply();
233    }
234  }
235}
Note: See TracBrowser for help on using the repository browser.