#region License Information
/* HeuristicLab
* Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using System.Drawing;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Optimization;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
using HeuristicLab.PluginInfrastructure;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Problems.DataAnalysis;
using HeuristicLab.Operators;
using HeuristicLab.Problems.DataAnalysis.Symbolic;
using HeuristicLab.Random;
namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
[Item("SymbolicRegressionEvaluator", "Evaluates a symbolic regression solution.")]
[StorableClass]
public abstract class SymbolicRegressionEvaluator : SingleSuccessorOperator, ISymbolicRegressionEvaluator {
private const string RandomParameterName = "Random";
private const string QualityParameterName = "Quality";
private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
private const string FunctionTreeParameterName = "FunctionTree";
private const string RegressionProblemDataParameterName = "RegressionProblemData";
private const string SamplesStartParameterName = "SamplesStart";
private const string SamplesEndParameterName = "SamplesEnd";
private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
#region ISymbolicRegressionEvaluator Members
public ILookupParameter QualityParameter {
get { return (ILookupParameter)Parameters[QualityParameterName]; }
}
public ILookupParameter SymbolicExpressionTreeInterpreterParameter {
get { return (ILookupParameter)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
}
public ILookupParameter SymbolicExpressionTreeParameter {
get { return (ILookupParameter)Parameters[FunctionTreeParameterName]; }
}
public ILookupParameter RegressionProblemDataParameter {
get { return (ILookupParameter)Parameters[RegressionProblemDataParameterName]; }
}
public IValueLookupParameter SamplesStartParameter {
get { return (IValueLookupParameter)Parameters[SamplesStartParameterName]; }
}
public IValueLookupParameter SamplesEndParameter {
get { return (IValueLookupParameter)Parameters[SamplesEndParameterName]; }
}
public IValueParameter RelativeNumberOfEvaluatedSamplesParameter {
get { return (IValueParameter)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
}
public ILookupParameter RandomParameter {
get { return (ILookupParameter)Parameters[RandomParameterName]; }
}
#endregion
#region properties
public IRandom Random {
get { return RandomParameter.ActualValue; }
}
public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
}
public SymbolicExpressionTree SymbolicExpressionTree {
get { return SymbolicExpressionTreeParameter.ActualValue; }
}
public DataAnalysisProblemData RegressionProblemData {
get { return RegressionProblemDataParameter.ActualValue; }
}
public IntValue SamplesStart {
get { return SamplesStartParameter.ActualValue; }
}
public IntValue SamplesEnd {
get { return SamplesEndParameter.ActualValue; }
}
public PercentValue RelativeNumberOfEvaluatedSamples {
get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
}
#endregion
public SymbolicRegressionEvaluator()
: base() {
Parameters.Add(new LookupParameter(RandomParameterName, "The random generator to use."));
Parameters.Add(new LookupParameter(QualityParameterName, "The quality of the evaluated symbolic regression solution."));
Parameters.Add(new LookupParameter(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used to calculate the output values of the symbolic expression tree."));
Parameters.Add(new LookupParameter(FunctionTreeParameterName, "The symbolic regression solution encoded as a symbolic expression tree."));
Parameters.Add(new LookupParameter(RegressionProblemDataParameterName, "The problem data on which the symbolic regression solution should be evaluated."));
Parameters.Add(new ValueLookupParameter(SamplesStartParameterName, "The start index of the dataset partition on which the symbolic regression solution should be evaluated."));
Parameters.Add(new ValueLookupParameter(SamplesEndParameterName, "The end index of the dataset partition on which the symbolic regression solution should be evaluated."));
Parameters.Add(new ValueParameter(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
}
[StorableConstructor]
protected SymbolicRegressionEvaluator(bool deserializing) : base(deserializing) { }
[StorableHook(Persistence.Default.CompositeSerializers.Storable.HookType.AfterDeserialization)]
private void AfterDeserialization() {
if (!Parameters.ContainsKey(RelativeNumberOfEvaluatedSamplesParameterName))
Parameters.Add(new ValueParameter(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
if (!Parameters.ContainsKey(RandomParameterName))
Parameters.Add(new LookupParameter(RandomParameterName, "The random generator to use."));
}
public override IOperation Apply() {
uint seed = (uint)Random.Next();
IEnumerable rows = GenerateRowsToEvaluate(seed, RelativeNumberOfEvaluatedSamples.Value, SamplesStart.Value, SamplesEnd.Value);
double quality = Evaluate(SymbolicExpressionTreeInterpreter, SymbolicExpressionTree, RegressionProblemData.Dataset,
RegressionProblemData.TargetVariable, rows);
QualityParameter.ActualValue = new DoubleValue(quality);
return base.Apply();
}
//algorithm taken from progamming pearls page 127
//IMPORTANT because IEnumerables with yield are used the seed must best be specified to return always
//the same sequence of numbers without caching the values.
private static IEnumerable GenerateRowsToEvaluate(uint seed, double relativeAmount, int start, int end) {
if (end < start) throw new ArgumentException("Start value is larger than end value.");
int count = (int)((end - start) * relativeAmount);
if (count == 0) count = 1;
int remaining = end - start;
MersenneTwister random = new MersenneTwister(seed);
for (int i = start; i < end && count > 0; i++) {
double probabilty = random.NextDouble();
if (probabilty < ((double)count) / remaining) {
count--;
yield return i;
}
remaining--;
}
}
protected abstract double Evaluate(ISymbolicExpressionTreeInterpreter interpreter,
SymbolicExpressionTree solution,
Dataset dataset,
StringValue targetVariable,
IEnumerable rows);
}
}