#region License Information
/* HeuristicLab
* Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HEAL.Attic;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Parameters;
using HeuristicLab.Problems.DataAnalysis;
using HeuristicLab.Problems.Instances;
namespace HeuristicLab.Problems.GeneticProgramming.BasicSymbolicRegression {
[Item("Koza-style Symbolic Regression", "An implementation of symbolic regression without bells-and-whistles. Use \"Symbolic Regression Problem (single-objective)\" if you want to use all features.")]
[Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 900)]
[StorableType("72011B73-28C6-4D5E-BEDF-27425BC87B9C")]
public sealed class Problem : SymbolicExpressionTreeProblem, IRegressionProblem, IProblemInstanceConsumer, IProblemInstanceExporter {
#region parameter names
private const string ProblemDataParameterName = "ProblemData";
#endregion
#region Parameter Properties
IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } }
public IValueParameter ProblemDataParameter {
get { return (IValueParameter)Parameters[ProblemDataParameterName]; }
}
#endregion
#region Properties
public IRegressionProblemData ProblemData {
get { return ProblemDataParameter.Value; }
set { ProblemDataParameter.Value = value; }
}
IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } }
#endregion
public event EventHandler ProblemDataChanged;
public override bool Maximization {
get { return true; }
}
#region item cloning and persistence
// persistence
[StorableConstructor]
private Problem(StorableConstructorFlag _) : base(_) { }
[StorableHook(HookType.AfterDeserialization)]
private void AfterDeserialization() {
RegisterEventHandlers();
}
// cloning
private Problem(Problem original, Cloner cloner)
: base(original, cloner) {
RegisterEventHandlers();
}
public override IDeepCloneable Clone(Cloner cloner) { return new Problem(this, cloner); }
#endregion
public Problem()
: base() {
Parameters.Add(new ValueParameter(ProblemDataParameterName, "The data for the regression problem", new RegressionProblemData()));
var g = new SimpleSymbolicExpressionGrammar(); // empty grammar is replaced in UpdateGrammar()
base.Encoding = new SymbolicExpressionTreeEncoding(g, 100, 17);
Encoding.GrammarParameter.ReadOnly = true;
UpdateGrammar();
RegisterEventHandlers();
}
public override double Evaluate(ISymbolicExpressionTree tree, IRandom random) {
// Doesn't use classes from HeuristicLab.Problems.DataAnalysis.Symbolic to make sure that the implementation can be fully understood easily.
// HeuristicLab.Problems.DataAnalysis.Symbolic would already provide all the necessary functionality (esp. interpreter) but at a much higher complexity.
// Another argument is that we don't need a reference to HeuristicLab.Problems.DataAnalysis.Symbolic
var problemData = ProblemData;
var rows = ProblemData.TrainingIndices.ToArray();
var target = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
var predicted = Interpret(tree, problemData.Dataset, rows);
OnlineCalculatorError errorState;
var r = OnlinePearsonsRCalculator.Calculate(target, predicted, out errorState);
if (errorState != OnlineCalculatorError.None) r = 0;
return r * r;
}
private IEnumerable Interpret(ISymbolicExpressionTree tree, IDataset dataset, IEnumerable rows) {
// skip programRoot and startSymbol
return InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), dataset, rows);
}
private IEnumerable InterpretRec(ISymbolicExpressionTreeNode node, IDataset dataset, IEnumerable rows) {
Func, IEnumerable> binaryEval =
(left, right, f) => InterpretRec(left, dataset, rows).Zip(InterpretRec(right, dataset, rows), f);
switch (node.Symbol.Name) {
case "+": return binaryEval(node.GetSubtree(0), node.GetSubtree(1), (x, y) => x + y);
case "*": return binaryEval(node.GetSubtree(0), node.GetSubtree(1), (x, y) => x * y);
case "-": return binaryEval(node.GetSubtree(0), node.GetSubtree(1), (x, y) => x - y);
case "%": return binaryEval(node.GetSubtree(0), node.GetSubtree(1), (x, y) => y.IsAlmost(0.0) ? 0.0 : x / y); // protected division
default: {
double erc;
if (double.TryParse(node.Symbol.Name, out erc)) {
return rows.Select(_ => erc);
} else {
// assume that this is a variable name
return dataset.GetDoubleValues(node.Symbol.Name, rows);
}
}
}
}
#region events
private void RegisterEventHandlers() {
ProblemDataParameter.ValueChanged += new EventHandler(ProblemDataParameter_ValueChanged);
if (ProblemDataParameter.Value != null) ProblemDataParameter.Value.Changed += new EventHandler(ProblemData_Changed);
}
private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) {
ProblemDataParameter.Value.Changed += new EventHandler(ProblemData_Changed);
OnProblemDataChanged();
OnReset();
}
private void ProblemData_Changed(object sender, EventArgs e) {
OnReset();
}
private void OnProblemDataChanged() {
UpdateGrammar();
var handler = ProblemDataChanged;
if (handler != null) handler(this, EventArgs.Empty);
}
private void UpdateGrammar() {
// whenever ProblemData is changed we create a new grammar with the necessary symbols
var g = new SimpleSymbolicExpressionGrammar();
g.AddSymbols(new[] { "+", "*", "%", "-" }, 2, 2); // % is protected division 1/0 := 0
foreach (var variableName in ProblemData.AllowedInputVariables)
g.AddTerminalSymbol(variableName);
// generate ephemeral random consts in the range [-10..+10[ (2*number of variables)
var rand = new System.Random();
for (int i = 0; i < ProblemData.AllowedInputVariables.Count() * 2; i++) {
string newErcSy;
do {
newErcSy = string.Format("{0:F2}", rand.NextDouble() * 20 - 10);
} while (g.Symbols.Any(sy => sy.Name == newErcSy)); // it might happen that we generate the same constant twice
g.AddTerminalSymbol(newErcSy);
}
Encoding.GrammarParameter.ReadOnly = false;
Encoding.Grammar = g;
Encoding.GrammarParameter.ReadOnly = true;
}
#endregion
#region Import & Export
public void Load(IRegressionProblemData data) {
Name = data.Name;
Description = data.Description;
ProblemData = data;
}
public IRegressionProblemData Export() {
return ProblemData;
}
#endregion
}
}