Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionEvaluator.cs @ 4038

Last change on this file since 4038 was 4038, checked in by mkommend, 14 years ago

corrected bug in SymbolicRegressionEvaluator (ticket #1082)

File size: 8.6 KB
RevLine 
[3374]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Drawing;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.PluginInfrastructure;
33using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
34using HeuristicLab.Problems.DataAnalysis;
35using HeuristicLab.Operators;
[3462]36using HeuristicLab.Problems.DataAnalysis.Symbolic;
[4038]37using HeuristicLab.Random;
[3374]38
39namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
40  [Item("SymbolicRegressionEvaluator", "Evaluates a symbolic regression solution.")]
41  [StorableClass]
42  public abstract class SymbolicRegressionEvaluator : SingleSuccessorOperator, ISymbolicRegressionEvaluator {
[4034]43    private const string RandomParameterName = "Random";
[3374]44    private const string QualityParameterName = "Quality";
[3462]45    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
[3374]46    private const string FunctionTreeParameterName = "FunctionTree";
47    private const string RegressionProblemDataParameterName = "RegressionProblemData";
[3452]48    private const string SamplesStartParameterName = "SamplesStart";
49    private const string SamplesEndParameterName = "SamplesEnd";
[4034]50    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
[3374]51    #region ISymbolicRegressionEvaluator Members
52
53    public ILookupParameter<DoubleValue> QualityParameter {
54      get { return (ILookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
55    }
56
[3462]57    public ILookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
58      get { return (ILookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
59    }
60
[3452]61    public ILookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
[3374]62      get { return (ILookupParameter<SymbolicExpressionTree>)Parameters[FunctionTreeParameterName]; }
63    }
64
65    public ILookupParameter<DataAnalysisProblemData> RegressionProblemDataParameter {
66      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[RegressionProblemDataParameterName]; }
67    }
68
[3452]69    public IValueLookupParameter<IntValue> SamplesStartParameter {
70      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesStartParameterName]; }
71    }
[3374]72
[3452]73    public IValueLookupParameter<IntValue> SamplesEndParameter {
74      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesEndParameterName]; }
75    }
[3374]76
[4034]77    public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
78      get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
79    }
80
81    public ILookupParameter<IRandom> RandomParameter {
82      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
83    }
84
[3374]85    #endregion
[3452]86    #region properties
[4034]87    public IRandom Random {
88      get { return RandomParameter.ActualValue; }
89    }
[3462]90    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
91      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
92    }
[3452]93    public SymbolicExpressionTree SymbolicExpressionTree {
94      get { return SymbolicExpressionTreeParameter.ActualValue; }
95    }
96    public DataAnalysisProblemData RegressionProblemData {
97      get { return RegressionProblemDataParameter.ActualValue; }
98    }
99    public IntValue SamplesStart {
100      get { return SamplesStartParameter.ActualValue; }
101    }
102    public IntValue SamplesEnd {
103      get { return SamplesEndParameter.ActualValue; }
104    }
[4034]105
106    public PercentValue RelativeNumberOfEvaluatedSamples {
107      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
108    }
[3452]109    #endregion
[3374]110
111    public SymbolicRegressionEvaluator()
112      : base() {
[4034]113      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
[3374]114      Parameters.Add(new LookupParameter<DoubleValue>(QualityParameterName, "The quality of the evaluated symbolic regression solution."));
[3462]115      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used to calculate the output values of the symbolic expression tree."));
[3374]116      Parameters.Add(new LookupParameter<SymbolicExpressionTree>(FunctionTreeParameterName, "The symbolic regression solution encoded as a symbolic expression tree."));
[3452]117      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(RegressionProblemDataParameterName, "The problem data on which the symbolic regression solution should be evaluated."));
118      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesStartParameterName, "The start index of the dataset partition on which the symbolic regression solution should be evaluated."));
119      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesEndParameterName, "The end index of the dataset partition on which the symbolic regression solution should be evaluated."));
[4034]120      Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
[3374]121    }
122
[4034]123    [StorableConstructor]
124    protected SymbolicRegressionEvaluator(bool deserializing) : base(deserializing) { }
125    [StorableHook(Persistence.Default.CompositeSerializers.Storable.HookType.AfterDeserialization)]
126    private void AfterDeserialization() {
127      if (!Parameters.ContainsKey(RelativeNumberOfEvaluatedSamplesParameterName))
128        Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
129      if (!Parameters.ContainsKey(RandomParameterName))
130        Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
131    }
132
[3374]133    public override IOperation Apply() {
[4038]134      uint seed = (uint)Random.Next();
135      IEnumerable<int> rows = GenerateRowsToEvaluate(seed, RelativeNumberOfEvaluatedSamples.Value, SamplesStart.Value, SamplesEnd.Value);
[4034]136      double quality = Evaluate(SymbolicExpressionTreeInterpreter, SymbolicExpressionTree, RegressionProblemData.Dataset,
137        RegressionProblemData.TargetVariable, rows);
138      QualityParameter.ActualValue = new DoubleValue(quality);
[3982]139      return base.Apply();
[3374]140    }
141
[4034]142
143    //algorithm taken from progamming pearls page 127
[4038]144    //IMPORTANT because IEnumerables with yield are used the seed must best be specified to return always
145    //the same sequence of numbers without caching the values.
146    private static IEnumerable<int> GenerateRowsToEvaluate(uint seed, double relativeAmount, int start, int end) {
[4035]147      if (end < start) throw new ArgumentException("Start value is larger than end value.");
[4034]148      int count = (int)((end - start) * relativeAmount);
149      if (count == 0) count = 1;
150
151      int remaining = end - start;
[4038]152      MersenneTwister random = new MersenneTwister(seed);
[4034]153      for (int i = start; i < end && count > 0; i++) {
[4038]154        double probabilty = random.NextDouble();
[4034]155        if (probabilty < ((double)count) / remaining) {
156          count--;
157          yield return i;
158        }
159        remaining--;
160      }
161    }
162
[3462]163    protected abstract double Evaluate(ISymbolicExpressionTreeInterpreter interpreter,
164      SymbolicExpressionTree solution,
165      Dataset dataset,
166      StringValue targetVariable,
[4034]167      IEnumerable<int> rows);
[3374]168  }
169}
Note: See TracBrowser for help on using the repository browser.