Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionEvaluator.cs @ 4038

Last change on this file since 4038 was 4038, checked in by mkommend, 14 years ago

corrected bug in SymbolicRegressionEvaluator (ticket #1082)

File size: 8.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Drawing;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.PluginInfrastructure;
33using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
34using HeuristicLab.Problems.DataAnalysis;
35using HeuristicLab.Operators;
36using HeuristicLab.Problems.DataAnalysis.Symbolic;
37using HeuristicLab.Random;
38
39namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
40  [Item("SymbolicRegressionEvaluator", "Evaluates a symbolic regression solution.")]
41  [StorableClass]
42  public abstract class SymbolicRegressionEvaluator : SingleSuccessorOperator, ISymbolicRegressionEvaluator {
43    private const string RandomParameterName = "Random";
44    private const string QualityParameterName = "Quality";
45    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
46    private const string FunctionTreeParameterName = "FunctionTree";
47    private const string RegressionProblemDataParameterName = "RegressionProblemData";
48    private const string SamplesStartParameterName = "SamplesStart";
49    private const string SamplesEndParameterName = "SamplesEnd";
50    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
51    #region ISymbolicRegressionEvaluator Members
52
53    public ILookupParameter<DoubleValue> QualityParameter {
54      get { return (ILookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
55    }
56
57    public ILookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
58      get { return (ILookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
59    }
60
61    public ILookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
62      get { return (ILookupParameter<SymbolicExpressionTree>)Parameters[FunctionTreeParameterName]; }
63    }
64
65    public ILookupParameter<DataAnalysisProblemData> RegressionProblemDataParameter {
66      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[RegressionProblemDataParameterName]; }
67    }
68
69    public IValueLookupParameter<IntValue> SamplesStartParameter {
70      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesStartParameterName]; }
71    }
72
73    public IValueLookupParameter<IntValue> SamplesEndParameter {
74      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesEndParameterName]; }
75    }
76
77    public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
78      get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
79    }
80
81    public ILookupParameter<IRandom> RandomParameter {
82      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
83    }
84
85    #endregion
86    #region properties
87    public IRandom Random {
88      get { return RandomParameter.ActualValue; }
89    }
90    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
91      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
92    }
93    public SymbolicExpressionTree SymbolicExpressionTree {
94      get { return SymbolicExpressionTreeParameter.ActualValue; }
95    }
96    public DataAnalysisProblemData RegressionProblemData {
97      get { return RegressionProblemDataParameter.ActualValue; }
98    }
99    public IntValue SamplesStart {
100      get { return SamplesStartParameter.ActualValue; }
101    }
102    public IntValue SamplesEnd {
103      get { return SamplesEndParameter.ActualValue; }
104    }
105
106    public PercentValue RelativeNumberOfEvaluatedSamples {
107      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
108    }
109    #endregion
110
111    public SymbolicRegressionEvaluator()
112      : base() {
113      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
114      Parameters.Add(new LookupParameter<DoubleValue>(QualityParameterName, "The quality of the evaluated symbolic regression solution."));
115      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used to calculate the output values of the symbolic expression tree."));
116      Parameters.Add(new LookupParameter<SymbolicExpressionTree>(FunctionTreeParameterName, "The symbolic regression solution encoded as a symbolic expression tree."));
117      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(RegressionProblemDataParameterName, "The problem data on which the symbolic regression solution should be evaluated."));
118      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesStartParameterName, "The start index of the dataset partition on which the symbolic regression solution should be evaluated."));
119      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesEndParameterName, "The end index of the dataset partition on which the symbolic regression solution should be evaluated."));
120      Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
121    }
122
123    [StorableConstructor]
124    protected SymbolicRegressionEvaluator(bool deserializing) : base(deserializing) { }
125    [StorableHook(Persistence.Default.CompositeSerializers.Storable.HookType.AfterDeserialization)]
126    private void AfterDeserialization() {
127      if (!Parameters.ContainsKey(RelativeNumberOfEvaluatedSamplesParameterName))
128        Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
129      if (!Parameters.ContainsKey(RandomParameterName))
130        Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
131    }
132
133    public override IOperation Apply() {
134      uint seed = (uint)Random.Next();
135      IEnumerable<int> rows = GenerateRowsToEvaluate(seed, RelativeNumberOfEvaluatedSamples.Value, SamplesStart.Value, SamplesEnd.Value);
136      double quality = Evaluate(SymbolicExpressionTreeInterpreter, SymbolicExpressionTree, RegressionProblemData.Dataset,
137        RegressionProblemData.TargetVariable, rows);
138      QualityParameter.ActualValue = new DoubleValue(quality);
139      return base.Apply();
140    }
141
142
143    //algorithm taken from progamming pearls page 127
144    //IMPORTANT because IEnumerables with yield are used the seed must best be specified to return always
145    //the same sequence of numbers without caching the values.
146    private static IEnumerable<int> GenerateRowsToEvaluate(uint seed, double relativeAmount, int start, int end) {
147      if (end < start) throw new ArgumentException("Start value is larger than end value.");
148      int count = (int)((end - start) * relativeAmount);
149      if (count == 0) count = 1;
150
151      int remaining = end - start;
152      MersenneTwister random = new MersenneTwister(seed);
153      for (int i = start; i < end && count > 0; i++) {
154        double probabilty = random.NextDouble();
155        if (probabilty < ((double)count) / remaining) {
156          count--;
157          yield return i;
158        }
159        remaining--;
160      }
161    }
162
163    protected abstract double Evaluate(ISymbolicExpressionTreeInterpreter interpreter,
164      SymbolicExpressionTree solution,
165      Dataset dataset,
166      StringValue targetVariable,
167      IEnumerable<int> rows);
168  }
169}
Note: See TracBrowser for help on using the repository browser.