Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionValidationAnalyzer.cs @ 7624

Last change on this file since 7624 was 5275, checked in by gkronber, 14 years ago

Merged changes from trunk to data analysis exploration branch and added fractional distance metric evaluator. #1142

File size: 9.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34
35namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
36  /// <summary>
37  /// A base class for operators that analyze the validation fitness of symbolic regression models.
38  /// </summary>
39  [Item("SymbolicRegressionValidationAnalyzer", "A base class for operators that analyze the validation fitness of symbolic regression models.")]
40  [StorableClass]
41  public abstract class SymbolicRegressionValidationAnalyzer : SingleSuccessorOperator {
42    private const string RandomParameterName = "Random";
43    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
44    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
45    private const string ProblemDataParameterName = "ProblemData";
46    private const string ValidationSamplesStartParameterName = "SamplesStart";
47    private const string ValidationSamplesEndParameterName = "SamplesEnd";
48    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
49    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
50    private const string EvaluatorParameterName = "Evaluator";
51    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
52
53    #region parameter properties
54    public ILookupParameter<IRandom> RandomParameter {
55      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
56    }
57    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
58      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
59    }
60    public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
61      get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
62    }
63    public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
64      get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
65    }
66    public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
67      get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
68    }
69    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
70      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
71    }
72    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
73      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
74    }
75    public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
76      get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
77    }
78
79    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
80      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
81    }
82    public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
83      get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
84    }
85    #endregion
86    #region properties
87    public IRandom Random {
88      get { return RandomParameter.ActualValue; }
89    }
90    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTree {
91      get { return SymbolicExpressionTreeParameter.ActualValue; }
92    }
93    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
94      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
95    }
96    public ISymbolicRegressionEvaluator Evaluator {
97      get { return EvaluatorParameter.ActualValue; }
98    }
99    public DataAnalysisProblemData ProblemData {
100      get { return ProblemDataParameter.ActualValue; }
101    }
102    public IntValue ValidiationSamplesStart {
103      get { return ValidationSamplesStartParameter.ActualValue; }
104    }
105    public IntValue ValidationSamplesEnd {
106      get { return ValidationSamplesEndParameter.ActualValue; }
107    }
108    public PercentValue RelativeNumberOfEvaluatedSamples {
109      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
110    }
111
112    public DoubleValue UpperEstimationLimit {
113      get { return UpperEstimationLimitParameter.ActualValue; }
114    }
115    public DoubleValue LowerEstimationLimit {
116      get { return LowerEstimationLimitParameter.ActualValue; }
117    }
118    #endregion
119
120    [StorableConstructor]
121    protected SymbolicRegressionValidationAnalyzer(bool deserializing) : base(deserializing) { }
122    protected SymbolicRegressionValidationAnalyzer(SymbolicRegressionValidationAnalyzer original, Cloner cloner) : base(original, cloner) { }
123    public SymbolicRegressionValidationAnalyzer()
124      : base() {
125      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
126      Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
127      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
128      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
129      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
130      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
131      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
132      Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
133      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
134      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
135    }
136
137    [StorableHook(HookType.AfterDeserialization)]
138    private void AfterDeserialization() { }
139
140    public override IOperation Apply() {
141      var trees = SymbolicExpressionTree.ToArray();
142
143      string targetVariable = ProblemData.TargetVariable.Value;
144
145      // select a random subset of rows in the validation set
146      int validationStart = ValidiationSamplesStart.Value;
147      int validationEnd = ValidationSamplesEnd.Value;
148      int seed = Random.Next();
149      int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
150      if (count == 0) count = 1;
151      IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count)
152        .Where(row => row < ProblemData.TestSamplesStart.Value || ProblemData.TestSamplesEnd.Value <= row);
153
154      double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
155      double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
156
157      double[] validationQuality = new double[trees.Count()];
158      for (int i = 0; i < validationQuality.Length; i++) {
159        validationQuality[i] = Evaluator.Evaluate(SymbolicExpressionTreeInterpreter, trees[i],
160            lowerEstimationLimit, upperEstimationLimit,
161            ProblemData.Dataset, targetVariable,
162           rows);
163      }
164
165      Analyze(trees, validationQuality);
166      return base.Apply();
167    }
168
169    protected abstract void Analyze(SymbolicExpressionTree[] trees, double[] validationQuality);
170  }
171}
Note: See TracBrowser for help on using the repository browser.