#region License Information
/* HeuristicLab
* Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
using HeuristicLab.Problems.DataAnalysis;
using HeuristicLab.Problems.Instances;
namespace HeuristicLab.Problems.DynamicalSystemsModelling {
public class Vector {
public readonly static Vector Zero = new Vector(new double[0]);
public static Vector operator +(Vector a, Vector b) {
if (a == Zero) return b;
if (b == Zero) return a;
Debug.Assert(a.arr.Length == b.arr.Length);
var res = new double[a.arr.Length];
for (int i = 0; i < res.Length; i++)
res[i] = a.arr[i] + b.arr[i];
return new Vector(res);
}
public static Vector operator -(Vector a, Vector b) {
if (b == Zero) return a;
if (a == Zero) return -b;
Debug.Assert(a.arr.Length == b.arr.Length);
var res = new double[a.arr.Length];
for (int i = 0; i < res.Length; i++)
res[i] = a.arr[i] - b.arr[i];
return new Vector(res);
}
public static Vector operator -(Vector v) {
if (v == Zero) return Zero;
for (int i = 0; i < v.arr.Length; i++)
v.arr[i] = -v.arr[i];
return v;
}
public static Vector operator *(double s, Vector v) {
if (v == Zero) return Zero;
if (s == 0.0) return Zero;
var res = new double[v.arr.Length];
for (int i = 0; i < res.Length; i++)
res[i] = s * v.arr[i];
return new Vector(res);
}
public static Vector operator *(Vector v, double s) {
return s * v;
}
public static Vector operator /(double s, Vector v) {
if (s == 0.0) return Zero;
if (v == Zero) throw new ArgumentException("Division by zero vector");
var res = new double[v.arr.Length];
for (int i = 0; i < res.Length; i++)
res[i] = 1.0 / v.arr[i];
return new Vector(res);
}
public static Vector operator /(Vector v, double s) {
return v * 1.0 / s;
}
private readonly double[] arr; // backing array;
public Vector(double[] v) {
this.arr = v;
}
public void CopyTo(double[] target) {
Debug.Assert(arr.Length <= target.Length);
Array.Copy(arr, target, arr.Length);
}
}
[Item("Dynamical Systems Modelling Problem", "TODO")]
[Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 900)]
[StorableClass]
public sealed class Problem : SymbolicExpressionTreeProblem, IRegressionProblem, IProblemInstanceConsumer, IProblemInstanceExporter {
#region parameter names
private const string ProblemDataParameterName = "ProblemData";
#endregion
#region Parameter Properties
IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } }
public IValueParameter ProblemDataParameter {
get { return (IValueParameter)Parameters[ProblemDataParameterName]; }
}
#endregion
#region Properties
public IRegressionProblemData ProblemData {
get { return ProblemDataParameter.Value; }
set { ProblemDataParameter.Value = value; }
}
IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } }
#endregion
public event EventHandler ProblemDataChanged;
public override bool Maximization {
get { return false; } // we minimize NMSE
}
#region item cloning and persistence
// persistence
[StorableConstructor]
private Problem(bool deserializing) : base(deserializing) { }
[StorableHook(HookType.AfterDeserialization)]
private void AfterDeserialization() {
RegisterEventHandlers();
}
// cloning
private Problem(Problem original, Cloner cloner)
: base(original, cloner) {
RegisterEventHandlers();
}
public override IDeepCloneable Clone(Cloner cloner) { return new Problem(this, cloner); }
#endregion
public Problem()
: base() {
Parameters.Add(new ValueParameter(ProblemDataParameterName, "The data captured from the dynamical system", new RegressionProblemData()));
// TODO: support multiple target variables
var g = new SimpleSymbolicExpressionGrammar(); // empty grammar is replaced in UpdateGrammar()
base.Encoding = new SymbolicExpressionTreeEncoding(g, 10, 5); // small for testing
UpdateGrammar();
RegisterEventHandlers();
}
public override double Evaluate(ISymbolicExpressionTree tree, IRandom random) {
var problemData = ProblemData;
var rows = ProblemData.TrainingIndices.ToArray();
var target = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
var nodeIdx = new Dictionary();
foreach(var node in tree.Root.IterateNodesPrefix().Where(n => IsConstantNode(n))) {
nodeIdx.Add(node, nodeIdx.Count);
}
var theta = nodeIdx.Select(_ => random.NextDouble() * 2.0 - 1.0).ToArray(); // init params randomly from Unif(-1,1)
double[] optTheta = new double[0];
if (theta.Length > 0) {
alglib.minlbfgsstate state;
alglib.minlbfgsreport report;
alglib.minlbfgscreate(Math.Min(theta.Length, 5), theta, out state);
alglib.minlbfgssetcond(state, 0.0, 0.0, 0.0, 100);
alglib.minlbfgsoptimize(state, EvaluateObjectiveAndGradient, null, new object[] { tree, problemData, nodeIdx });
alglib.minlbfgsresults(state, out optTheta, out report);
/*
*
* L-BFGS algorithm results
INPUT PARAMETERS:
State - algorithm state
OUTPUT PARAMETERS:
X - array[0..N-1], solution
Rep - optimization report:
* Rep.TerminationType completetion code:
* -7 gradient verification failed.
See MinLBFGSSetGradientCheck() for more information.
* -2 rounding errors prevent further improvement.
X contains best point found.
* -1 incorrect parameters were specified
* 1 relative function improvement is no more than
EpsF.
* 2 relative step is no more than EpsX.
* 4 gradient norm is no more than EpsG
* 5 MaxIts steps was taken
* 7 stopping conditions are too stringent,
further improvement is impossible
* Rep.IterationsCount contains iterations count
* NFEV countains number of function calculations
*/
if (report.terminationtype < 0) return double.MaxValue;
}
// perform evaluation for optimal theta to get quality value
double[] grad = new double[optTheta.Length];
double optQuality = double.NaN;
EvaluateObjectiveAndGradient(optTheta, ref optQuality, grad, new object[] { tree, problemData, nodeIdx});
if (double.IsNaN(optQuality) || double.IsInfinity(optQuality)) return 10E6; // return a large value (TODO: be consistent by using NMSE)
// TODO: write back values
return optQuality;
}
private static void EvaluateObjectiveAndGradient(double[] x, ref double f, double[] grad, object obj) {
var tree = (ISymbolicExpressionTree)((object[])obj)[0];
var problemData = (IRegressionProblemData)((object[])obj)[1];
var nodeIdx = (Dictionary)((object[])obj)[2];
var predicted = Integrate(
new[] { tree }, // we assume tree contains an expression for the change of the target variable over time y'(t)
problemData.Dataset,
problemData.AllowedInputVariables.ToArray(),
new[] { problemData.TargetVariable },
problemData.TrainingIndices,
nodeIdx,
x).ToArray();
// objective function is MSE
f = 0.0;
int n = predicted.Length;
double invN = 1.0 / n;
var g = Vector.Zero;
foreach(var pair in predicted.Zip(problemData.TargetVariableTrainingValues, Tuple.Create)) {
var y_pred = pair.Item1;
var y = pair.Item2;
var res = (y - y_pred.Item1);
var ressq = res * res;
f += ressq * invN;
g += -2.0 * res * y_pred.Item2 * invN;
}
g.CopyTo(grad);
}
private static IEnumerable> Integrate(
ISymbolicExpressionTree[] trees, IDataset dataset, string[] inputVariables, string[] targetVariables, IEnumerable rows,
Dictionary nodeIdx, double[] parameterValues) {
int NUM_STEPS = 1;
double h = 1.0 / NUM_STEPS;
// return first value as stored in the dataset
yield return Tuple.Create(dataset.GetDoubleValue(targetVariables.First(), rows.First()), Vector.Zero);
// integrate forward starting with known values for the target in t0
var variableValues = new Dictionary>();
var t0 = rows.First();
foreach (var varName in inputVariables) {
variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero));
}
foreach (var varName in targetVariables) {
variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero));
}
foreach (var t in rows.Skip(1)) {
for (int step = 0; step < NUM_STEPS; step++) {
var deltaValues = new Dictionary>();
foreach (var tup in trees.Zip(targetVariables, Tuple.Create)) {
var tree = tup.Item1;
var targetVarName = tup.Item2;
// skip programRoot and startSymbol
var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), variableValues, nodeIdx, parameterValues);
deltaValues.Add(targetVarName, res);
}
// update variableValues for next step
foreach (var kvp in deltaValues) {
var oldVal = variableValues[kvp.Key];
variableValues[kvp.Key] = Tuple.Create(
oldVal.Item1 + h * kvp.Value.Item1,
oldVal.Item2 + h * kvp.Value.Item2
);
}
}
// yield target values
foreach (var varName in targetVariables) {
yield return variableValues[varName];
}
// update for next time step
foreach (var varName in inputVariables) {
variableValues[varName] = Tuple.Create(dataset.GetDoubleValue(varName, t), Vector.Zero);
}
}
}
private static Tuple InterpretRec(
ISymbolicExpressionTreeNode node,
Dictionary> variableValues,
Dictionary nodeIdx,
double[] parameterValues
) {
switch (node.Symbol.Name) {
case "+": {
var l = InterpretRec(node.GetSubtree(0), variableValues, nodeIdx, parameterValues);
var r = InterpretRec(node.GetSubtree(1), variableValues, nodeIdx, parameterValues);
return Tuple.Create(l.Item1 + r.Item1, l.Item2 + r.Item2);
}
case "*": {
var l = InterpretRec(node.GetSubtree(0), variableValues, nodeIdx,parameterValues);
var r = InterpretRec(node.GetSubtree(1), variableValues, nodeIdx, parameterValues);
return Tuple.Create(l.Item1 * r.Item1, l.Item2 * r.Item1 + l.Item1 * r.Item2);
}
case "-": {
var l = InterpretRec(node.GetSubtree(0), variableValues, nodeIdx,parameterValues);
var r = InterpretRec(node.GetSubtree(1), variableValues, nodeIdx, parameterValues);
return Tuple.Create(l.Item1 - r.Item1, l.Item2 - r.Item2);
}
case "%": {
var l = InterpretRec(node.GetSubtree(0), variableValues, nodeIdx,parameterValues);
var r = InterpretRec(node.GetSubtree(1), variableValues, nodeIdx, parameterValues);
// protected division
if (r.Item1.IsAlmost(0.0)) {
return Tuple.Create(0.0, Vector.Zero);
} else {
return Tuple.Create(
l.Item1 / r.Item1,
l.Item1 * -1.0 / (r.Item1 * r.Item1) * r.Item2 + 1.0 / r.Item1 * l.Item2 // (f/g)' = f * (1/g)' + 1/g * f' = f * -1/g² * g' + 1/g * f'
);
}
}
default: {
// distinguish other cases
if (IsConstantNode(node)) {
var vArr = new double[parameterValues.Length]; // backing array for vector
vArr[nodeIdx[node]] = 1.0;
var g = new Vector(vArr);
return Tuple.Create(parameterValues[nodeIdx[node]], g);
} else {
// assume a variable name
var varName = node.Symbol.Name;
return variableValues[varName];
}
}
}
}
#region events
private void RegisterEventHandlers() {
ProblemDataParameter.ValueChanged += new EventHandler(ProblemDataParameter_ValueChanged);
if (ProblemDataParameter.Value != null) ProblemDataParameter.Value.Changed += new EventHandler(ProblemData_Changed);
}
private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) {
ProblemDataParameter.Value.Changed += new EventHandler(ProblemData_Changed);
OnProblemDataChanged();
OnReset();
}
private void ProblemData_Changed(object sender, EventArgs e) {
OnReset();
}
private void OnProblemDataChanged() {
UpdateGrammar();
var handler = ProblemDataChanged;
if (handler != null) handler(this, EventArgs.Empty);
}
private void UpdateGrammar() {
// whenever ProblemData is changed we create a new grammar with the necessary symbols
var g = new SimpleSymbolicExpressionGrammar();
g.AddSymbols(new[] {
"+",
"*",
// "%", // % is protected division 1/0 := 0 // removed for testing
"-",
}, 2, 2);
// TODO
//g.AddSymbols(new[] {
// "exp",
// "log", // log( ) // TODO: init a theta to ensure the value is always positive
// "exp_minus" // exp((-1) *
//}, 1, 1);
foreach (var variableName in ProblemData.AllowedInputVariables)
g.AddTerminalSymbol(variableName);
foreach (var variableName in new string[] { ProblemData.TargetVariable }) // TODO: multiple target variables
g.AddTerminalSymbol(variableName);
// generate symbols for numeric parameters for which the value is optimized using AutoDiff
// we generate multiple symbols to balance the probability for selecting a numeric parameter in the generation of random trees
var numericConstantsFactor = 2.0;
for (int i = 0; i < numericConstantsFactor * (ProblemData.AllowedInputVariables.Count() + 1); i++) {
g.AddTerminalSymbol("θ" + i); // numeric parameter for which the value is optimized using AutoDiff
}
Encoding.Grammar = g;
}
#endregion
#region Import & Export
public void Load(IRegressionProblemData data) {
Name = data.Name;
Description = data.Description;
ProblemData = data;
}
public IRegressionProblemData Export() {
return ProblemData;
}
#endregion
#region helper
private static bool IsConstantNode(ISymbolicExpressionTreeNode n) {
return n.Symbol.Name.StartsWith("θ");
}
#endregion
}
}