Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionValidationAnalyzer.cs @ 5313

Last change on this file since 5313 was 5197, checked in by gkronber, 14 years ago

Introduced base class for operators that evaluate symbolic regression models on a validation set. #1356

File size: 9.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34
35namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
36  /// <summary>
37  /// A base class for operators that analyze the validation fitness of symbolic regression models.
38  /// </summary>
39  [Item("SymbolicRegressionValidationAnalyzer", "A base class for operators that analyze the validation fitness of symbolic regression models.")]
40  [StorableClass]
41  public abstract class SymbolicRegressionValidationAnalyzer : SingleSuccessorOperator {
42    private const string RandomParameterName = "Random";
43    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
44    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
45    private const string ProblemDataParameterName = "ProblemData";
46    private const string ValidationSamplesStartParameterName = "SamplesStart";
47    private const string ValidationSamplesEndParameterName = "SamplesEnd";
48    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
49    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
50    private const string EvaluatorParameterName = "Evaluator";
51    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
52
53    #region parameter properties
54    public ILookupParameter<IRandom> RandomParameter {
55      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
56    }
57    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
58      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
59    }
60    public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
61      get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
62    }
63    public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
64      get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
65    }
66    public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
67      get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
68    }
69    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
70      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
71    }
72    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
73      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
74    }
75    public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
76      get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
77    }
78
79    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
80      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
81    }
82    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
83      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
84    }
85    #endregion
86    #region properties
87    public IRandom Random {
88      get { return RandomParameter.ActualValue; }
89    }
90    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTree {
91      get { return SymbolicExpressionTreeParameter.ActualValue; }
92    }
93    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
94      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
95    }
96    public ISymbolicRegressionEvaluator Evaluator {
97      get { return EvaluatorParameter.ActualValue; }
98    }
99    public DataAnalysisProblemData ProblemData {
100      get { return ProblemDataParameter.ActualValue; }
101    }
102    public IntValue ValidiationSamplesStart {
103      get { return ValidationSamplesStartParameter.ActualValue; }
104    }
105    public IntValue ValidationSamplesEnd {
106      get { return ValidationSamplesEndParameter.ActualValue; }
107    }
108    public PercentValue RelativeNumberOfEvaluatedSamples {
109      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
110    }
111
112    public DoubleValue UpperEstimationLimit {
113      get { return UpperEstimationLimitParameter.ActualValue; }
114    }
115    public DoubleValue LowerEstimationLimit {
116      get { return LowerEstimationLimitParameter.ActualValue; }
117    }
118    #endregion
119
120    [StorableConstructor]
121    protected SymbolicRegressionValidationAnalyzer(bool deserializing) : base(deserializing) { }
122    protected SymbolicRegressionValidationAnalyzer(SymbolicRegressionValidationAnalyzer original, Cloner cloner) : base(original, cloner) { }
123    public SymbolicRegressionValidationAnalyzer()
124      : base() {
125      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
126      Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
127      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
128      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
129      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
130      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
131      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
132      Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
133      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
134      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
135    }
136
137    [StorableHook(HookType.AfterDeserialization)]
138    private void AfterDeserialization() { }
139
140    public override IOperation Apply() {
141      var trees = SymbolicExpressionTree.ToArray();
142
143      string targetVariable = ProblemData.TargetVariable.Value;
144
145      // select a random subset of rows in the validation set
146      int validationStart = ValidiationSamplesStart.Value;
147      int validationEnd = ValidationSamplesEnd.Value;
148      int seed = Random.Next();
149      int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
150      if (count == 0) count = 1;
151      IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count)
152        .Where(row => row < ProblemData.TestSamplesStart.Value || ProblemData.TestSamplesEnd.Value <= row);
153
154      double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
155      double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
156
157      double[] validationQuality = new double[trees.Count()];
158      for (int i = 0; i < validationQuality.Length; i++) {
159        validationQuality[i] = Evaluator.Evaluate(SymbolicExpressionTreeInterpreter, trees[i],
160            lowerEstimationLimit, upperEstimationLimit,
161            ProblemData.Dataset, targetVariable,
162           rows);
163      }
164
165      Analyze(trees, validationQuality);
166      return base.Apply();
167    }
168
169    protected abstract void Analyze(SymbolicExpressionTree[] trees, double[] validationQuality);
170  }
171}
Note: See TracBrowser for help on using the repository browser.