#region License Information /* HeuristicLab * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using System.Drawing; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.PluginInfrastructure; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Operators; using HeuristicLab.Problems.DataAnalysis.Symbolic; using HeuristicLab.Random; namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic { [Item("SymbolicRegressionEvaluator", "Evaluates a symbolic regression solution.")] [StorableClass] public abstract class SymbolicRegressionEvaluator : SingleSuccessorOperator, ISymbolicRegressionEvaluator { private const string RandomParameterName = "Random"; private const string QualityParameterName = "Quality"; private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter"; private const string FunctionTreeParameterName = "FunctionTree"; private const string RegressionProblemDataParameterName = "RegressionProblemData"; private const string SamplesStartParameterName = "SamplesStart"; private const string SamplesEndParameterName = "SamplesEnd"; private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples"; #region ISymbolicRegressionEvaluator Members public ILookupParameter QualityParameter { get { return (ILookupParameter)Parameters[QualityParameterName]; } } public ILookupParameter SymbolicExpressionTreeInterpreterParameter { get { return (ILookupParameter)Parameters[SymbolicExpressionTreeInterpreterParameterName]; } } public ILookupParameter SymbolicExpressionTreeParameter { get { return (ILookupParameter)Parameters[FunctionTreeParameterName]; } } public ILookupParameter RegressionProblemDataParameter { get { return (ILookupParameter)Parameters[RegressionProblemDataParameterName]; } } public IValueLookupParameter SamplesStartParameter { get { return (IValueLookupParameter)Parameters[SamplesStartParameterName]; } } public IValueLookupParameter SamplesEndParameter { get { return (IValueLookupParameter)Parameters[SamplesEndParameterName]; } } public IValueParameter RelativeNumberOfEvaluatedSamplesParameter { get { return (IValueParameter)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; } } public ILookupParameter RandomParameter { get { return (ILookupParameter)Parameters[RandomParameterName]; } } #endregion #region properties public IRandom Random { get { return RandomParameter.ActualValue; } } public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter { get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; } } public SymbolicExpressionTree SymbolicExpressionTree { get { return SymbolicExpressionTreeParameter.ActualValue; } } public DataAnalysisProblemData RegressionProblemData { get { return RegressionProblemDataParameter.ActualValue; } } public IntValue SamplesStart { get { return SamplesStartParameter.ActualValue; } } public IntValue SamplesEnd { get { return SamplesEndParameter.ActualValue; } } public PercentValue RelativeNumberOfEvaluatedSamples { get { return RelativeNumberOfEvaluatedSamplesParameter.Value; } } #endregion public SymbolicRegressionEvaluator() : base() { Parameters.Add(new LookupParameter(RandomParameterName, "The random generator to use.")); Parameters.Add(new LookupParameter(QualityParameterName, "The quality of the evaluated symbolic regression solution.")); Parameters.Add(new LookupParameter(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used to calculate the output values of the symbolic expression tree.")); Parameters.Add(new LookupParameter(FunctionTreeParameterName, "The symbolic regression solution encoded as a symbolic expression tree.")); Parameters.Add(new LookupParameter(RegressionProblemDataParameterName, "The problem data on which the symbolic regression solution should be evaluated.")); Parameters.Add(new ValueLookupParameter(SamplesStartParameterName, "The start index of the dataset partition on which the symbolic regression solution should be evaluated.")); Parameters.Add(new ValueLookupParameter(SamplesEndParameterName, "The end index of the dataset partition on which the symbolic regression solution should be evaluated.")); Parameters.Add(new ValueParameter(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1))); } [StorableConstructor] protected SymbolicRegressionEvaluator(bool deserializing) : base(deserializing) { } [StorableHook(Persistence.Default.CompositeSerializers.Storable.HookType.AfterDeserialization)] private void AfterDeserialization() { if (!Parameters.ContainsKey(RelativeNumberOfEvaluatedSamplesParameterName)) Parameters.Add(new ValueParameter(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1))); if (!Parameters.ContainsKey(RandomParameterName)) Parameters.Add(new LookupParameter(RandomParameterName, "The random generator to use.")); } public override IOperation Apply() { uint seed = (uint)Random.Next(); IEnumerable rows = GenerateRowsToEvaluate(seed, RelativeNumberOfEvaluatedSamples.Value, SamplesStart.Value, SamplesEnd.Value); double quality = Evaluate(SymbolicExpressionTreeInterpreter, SymbolicExpressionTree, RegressionProblemData.Dataset, RegressionProblemData.TargetVariable, rows); QualityParameter.ActualValue = new DoubleValue(quality); return base.Apply(); } //algorithm taken from progamming pearls page 127 //IMPORTANT because IEnumerables with yield are used the seed must best be specified to return always //the same sequence of numbers without caching the values. private static IEnumerable GenerateRowsToEvaluate(uint seed, double relativeAmount, int start, int end) { if (end < start) throw new ArgumentException("Start value is larger than end value."); int count = (int)((end - start) * relativeAmount); if (count == 0) count = 1; int remaining = end - start; MersenneTwister random = new MersenneTwister(seed); for (int i = start; i < end && count > 0; i++) { double probabilty = random.NextDouble(); if (probabilty < ((double)count) / remaining) { count--; yield return i; } remaining--; } } protected abstract double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, StringValue targetVariable, IEnumerable rows); } }