#region License Information
/* HeuristicLab
* Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using HeuristicLab.Analysis;
using HeuristicLab.Collections;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Optimization;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
using HeuristicLab.Problems.DataAnalysis;
using HeuristicLab.Problems.DataAnalysis.Symbolic;
using HeuristicLab.Problems.Instances;
using Variable = HeuristicLab.Problems.DataAnalysis.Symbolic.Variable;
namespace HeuristicLab.Problems.DynamicalSystemsModelling {
// Eine weitere Möglichkeit ist spline-smoothing der Daten (über Zeit) um damit für jede Zielvariable
// einen bereinigten (Rauschen) Wert und die Ableitung dy/dt für alle Beobachtungen zu bekommen
// danach kann man die Regression direkt für dy/dt anwenden (mit bereinigten y als inputs)
[Item("Dynamical Systems Modelling Problem", "TODO")]
[Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 900)]
[StorableClass]
public sealed class Problem : SingleObjectiveBasicProblem, IRegressionProblem, IProblemInstanceConsumer, IProblemInstanceExporter {
#region parameter names
private const string ProblemDataParameterName = "Data";
private const string TargetVariablesParameterName = "Target variables";
private const string FunctionSetParameterName = "Function set";
private const string MaximumLengthParameterName = "Size limit";
private const string MaximumParameterOptimizationIterationsParameterName = "Max. parameter optimization iterations";
private const string NumberOfLatentVariablesParameterName = "Number of latent variables";
private const string NumericIntegrationStepsParameterName = "Steps for numeric integration";
private const string TrainingEpisodesParameterName = "Training episodes";
private const string OptimizeParametersForEpisodesParameterName = "Optimize parameters for episodes";
private const string OdeSolverParameterName = "ODE Solver";
#endregion
#region Parameter Properties
IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } }
public IValueParameter ProblemDataParameter {
get { return (IValueParameter)Parameters[ProblemDataParameterName]; }
}
public IValueParameter> TargetVariablesParameter {
get { return (IValueParameter>)Parameters[TargetVariablesParameterName]; }
}
public IValueParameter> FunctionSetParameter {
get { return (IValueParameter>)Parameters[FunctionSetParameterName]; }
}
public IFixedValueParameter MaximumLengthParameter {
get { return (IFixedValueParameter)Parameters[MaximumLengthParameterName]; }
}
public IFixedValueParameter MaximumParameterOptimizationIterationsParameter {
get { return (IFixedValueParameter)Parameters[MaximumParameterOptimizationIterationsParameterName]; }
}
public IFixedValueParameter NumberOfLatentVariablesParameter {
get { return (IFixedValueParameter)Parameters[NumberOfLatentVariablesParameterName]; }
}
public IFixedValueParameter NumericIntegrationStepsParameter {
get { return (IFixedValueParameter)Parameters[NumericIntegrationStepsParameterName]; }
}
public IValueParameter> TrainingEpisodesParameter {
get { return (IValueParameter>)Parameters[TrainingEpisodesParameterName]; }
}
public IFixedValueParameter OptimizeParametersForEpisodesParameter {
get { return (IFixedValueParameter)Parameters[OptimizeParametersForEpisodesParameterName]; }
}
public IConstrainedValueParameter OdeSolverParameter {
get { return (IConstrainedValueParameter)Parameters[OdeSolverParameterName]; }
}
#endregion
#region Properties
public IRegressionProblemData ProblemData {
get { return ProblemDataParameter.Value; }
set { ProblemDataParameter.Value = value; }
}
IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } }
public ReadOnlyCheckedItemList TargetVariables {
get { return TargetVariablesParameter.Value; }
}
public ReadOnlyCheckedItemList FunctionSet {
get { return FunctionSetParameter.Value; }
}
public int MaximumLength {
get { return MaximumLengthParameter.Value.Value; }
}
public int MaximumParameterOptimizationIterations {
get { return MaximumParameterOptimizationIterationsParameter.Value.Value; }
}
public int NumberOfLatentVariables {
get { return NumberOfLatentVariablesParameter.Value.Value; }
}
public int NumericIntegrationSteps {
get { return NumericIntegrationStepsParameter.Value.Value; }
}
public IEnumerable TrainingEpisodes {
get { return TrainingEpisodesParameter.Value; }
}
public bool OptimizeParametersForEpisodes {
get { return OptimizeParametersForEpisodesParameter.Value.Value; }
}
public string OdeSolver {
get { return OdeSolverParameter.Value.Value; }
set {
var matchingValue = OdeSolverParameter.ValidValues.FirstOrDefault(v => v.Value == value);
if (matchingValue == null) throw new ArgumentOutOfRangeException();
else OdeSolverParameter.Value = matchingValue;
}
}
#endregion
public event EventHandler ProblemDataChanged;
public override bool Maximization {
get { return false; } // we minimize NMSE
}
#region item cloning and persistence
// persistence
[StorableConstructor]
private Problem(bool deserializing) : base(deserializing) { }
[StorableHook(HookType.AfterDeserialization)]
private void AfterDeserialization() {
if (!Parameters.ContainsKey(OptimizeParametersForEpisodesParameterName)) {
Parameters.Add(new FixedValueParameter(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
}
RegisterEventHandlers();
}
// cloning
private Problem(Problem original, Cloner cloner)
: base(original, cloner) {
RegisterEventHandlers();
}
public override IDeepCloneable Clone(Cloner cloner) { return new Problem(this, cloner); }
#endregion
public Problem()
: base() {
var targetVariables = new CheckedItemList().AsReadOnly(); // HACK: it would be better to provide a new class derived from IDataAnalysisProblem
var functions = CreateFunctionSet();
Parameters.Add(new ValueParameter(ProblemDataParameterName, "The data captured from the dynamical system. Use CSV import functionality to import data.", new RegressionProblemData()));
Parameters.Add(new ValueParameter>(TargetVariablesParameterName, "Target variables (overrides setting in ProblemData)", targetVariables));
Parameters.Add(new ValueParameter>(FunctionSetParameterName, "The list of allowed functions", functions));
Parameters.Add(new FixedValueParameter(MaximumLengthParameterName, "The maximally allowed length of each expression. Set to a small value (5 - 25). Default = 10", new IntValue(10)));
Parameters.Add(new FixedValueParameter(MaximumParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of parameters (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(100)));
Parameters.Add(new FixedValueParameter(NumberOfLatentVariablesParameterName, "Latent variables (unobserved variables) allow us to produce expressions which are integrated up and can be used in other expressions. They are handled similarly to target variables in forward simulation / integration. The difference to target variables is that there are no data to which the calculated values of latent variables are compared. Set to a small value (0 .. 5) as necessary (default = 0)", new IntValue(0)));
Parameters.Add(new FixedValueParameter(NumericIntegrationStepsParameterName, "Number of steps in the numeric integration that are taken from one row to the next (set to 1 to 100). More steps makes the algorithm slower, less steps worsens the accuracy of the numeric integration scheme.", new IntValue(10)));
Parameters.Add(new ValueParameter>(TrainingEpisodesParameterName, "A list of ranges that should be used for training, each range represents an independent episode. This overrides the TrainingSet parameter in ProblemData.", new ItemList()));
Parameters.Add(new FixedValueParameter(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
var solversStr = new string[] { "HeuristicLab", "CVODES" };
var solvers = new ItemSet(
solversStr.Select(s => new StringValue(s).AsReadOnly())
);
Parameters.Add(new ConstrainedValueParameter(OdeSolverParameterName, "The solver to use for solving the initial value ODE problems", solvers, solvers.First()));
RegisterEventHandlers();
InitAllParameters();
// TODO: do not clear selection of target variables when the input variables are changed (keep selected target variables)
// TODO: UI hangs when selecting / deselecting input variables because the encoding is updated on each item
// TODO: use training range as default training episode
// TODO: write back optimized parameters to solution?
// TODO: optimization of starting values for latent variables in CVODES solver
}
public override double Evaluate(Individual individual, IRandom random) {
var trees = individual.Values.Select(v => v.Value).OfType().ToArray(); // extract all trees from individual
// write back optimized parameters to tree nodes instead of the separate OptTheta variable
// retreive optimized parameters from nodes?
if (OptimizeParametersForEpisodes) {
int eIdx = 0;
double totalNMSE = 0.0;
int totalSize = 0;
foreach (var episode in TrainingEpisodes) {
double[] optTheta;
double nmse;
OptimizeForEpisodes(trees, random, new[] { episode }, out optTheta, out nmse);
individual["OptTheta_" + eIdx] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
eIdx++;
totalNMSE += nmse * episode.Size;
totalSize += episode.Size;
}
return totalNMSE / totalSize;
} else {
double[] optTheta;
double nmse;
OptimizeForEpisodes(trees, random, TrainingEpisodes, out optTheta, out nmse);
individual["OptTheta"] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
return nmse;
}
}
private void OptimizeForEpisodes(
ISymbolicExpressionTree[] trees,
IRandom random,
IEnumerable episodes,
out double[] optTheta,
out double nmse) {
var rows = episodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start)).ToArray();
var problemData = ProblemData;
var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray();
var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
var targetValues = new double[rows.Length, targetVars.Length];
// collect values of all target variables
var colIdx = 0;
foreach (var targetVar in targetVars) {
int rowIdx = 0;
foreach (var value in problemData.Dataset.GetDoubleValues(targetVar, rows)) {
targetValues[rowIdx, colIdx] = value;
rowIdx++;
}
colIdx++;
}
// NOTE: the order of values in parameter matches prefix order of constant nodes in trees
var paramNodes = new List();
foreach (var t in trees) {
foreach (var n in t.IterateNodesPrefix()) {
if (IsConstantNode(n))
paramNodes.Add(n);
}
}
// init params randomly
// theta contains parameter values for trees and then the initial values for latent variables (a separate vector for each episode)
// inital values for latent variables are also optimized
var theta = new double[paramNodes.Count + latentVariables.Length * episodes.Count()];
for (int i = 0; i < theta.Length; i++)
theta[i] = random.NextDouble() * 2.0 - 1.0;
optTheta = new double[0];
if (theta.Length > 0) {
alglib.minlbfgsstate state;
alglib.minlbfgsreport report;
alglib.minlbfgscreate(Math.Min(theta.Length, 5), theta, out state);
alglib.minlbfgssetcond(state, 0.0, 0.0, 0.0, MaximumParameterOptimizationIterations);
//alglib.minlbfgssetgradientcheck(state, 1e-6);
alglib.minlbfgsoptimize(state, EvaluateObjectiveAndGradient, null,
new object[] { trees, targetVars, problemData, targetValues, episodes.ToArray(), NumericIntegrationSteps, latentVariables, OdeSolver }); //TODO: create a type
alglib.minlbfgsresults(state, out optTheta, out report);
/*
*
* L-BFGS algorithm results
INPUT PARAMETERS:
State - algorithm state
OUTPUT PARAMETERS:
X - array[0..N-1], solution
Rep - optimization report:
* Rep.TerminationType completetion code:
* -7 gradient verification failed.
See MinLBFGSSetGradientCheck() for more information.
* -2 rounding errors prevent further improvement.
X contains best point found.
* -1 incorrect parameters were specified
* 1 relative function improvement is no more than
EpsF.
* 2 relative step is no more than EpsX.
* 4 gradient norm is no more than EpsG
* 5 MaxIts steps was taken
* 7 stopping conditions are too stringent,
further improvement is impossible
* Rep.IterationsCount contains iterations count
* NFEV countains number of function calculations
*/
if (report.terminationtype < 0) { nmse = 10E6; return; }
}
// perform evaluation for optimal theta to get quality value
double[] grad = new double[optTheta.Length];
nmse = double.NaN;
EvaluateObjectiveAndGradient(optTheta, ref nmse, grad,
new object[] { trees, targetVars, problemData, targetValues, episodes.ToArray(), NumericIntegrationSteps, latentVariables, OdeSolver });
if (double.IsNaN(nmse) || double.IsInfinity(nmse)) { nmse = 10E6; return; } // return a large value (TODO: be consistent by using NMSE)
}
private static void EvaluateObjectiveAndGradient(double[] x, ref double f, double[] grad, object obj) {
var trees = (ISymbolicExpressionTree[])((object[])obj)[0];
var targetVariables = (string[])((object[])obj)[1];
var problemData = (IRegressionProblemData)((object[])obj)[2];
var targetValues = (double[,])((object[])obj)[3];
var episodes = (IntRange[])((object[])obj)[4];
var numericIntegrationSteps = (int)((object[])obj)[5];
var latentVariables = (string[])((object[])obj)[6];
var odeSolver = (string)((object[])obj)[7];
Tuple[][] predicted = null; // one array of predictions for each episode
// TODO: stop integration early for diverging solutions
predicted = Integrate(
trees, // we assume trees contain expressions for the change of each target variable over time y'(t)
problemData.Dataset,
problemData.AllowedInputVariables.ToArray(),
targetVariables,
latentVariables,
episodes,
x,
odeSolver,
numericIntegrationSteps).ToArray();
if (predicted.Length != targetValues.GetLength(0)) {
f = double.MaxValue;
Array.Clear(grad, 0, grad.Length);
return;
}
// for normalized MSE = 1/variance(t) * MSE(t, pred)
// TODO: Perf. (by standardization of target variables before evaluation of all trees)
var invVar = Enumerable.Range(0, targetVariables.Length)
.Select(c => Enumerable.Range(0, targetValues.GetLength(0)).Select(row => targetValues[row, c])) // column vectors
.Select(vec => vec.Variance())
.Select(v => 1.0 / v)
.ToArray();
// objective function is NMSE
f = 0.0;
int n = predicted.Length;
double invN = 1.0 / n;
var g = Vector.Zero;
int r = 0;
foreach (var y_pred in predicted) {
// y_pred contains the predicted values for target variables first and then predicted values for latent variables
for (int c = 0; c < targetVariables.Length; c++) {
var y_pred_f = y_pred[c].Item1;
var y = targetValues[r, c];
var res = (y - y_pred_f);
var ressq = res * res;
f += ressq * invN * invVar[c];
g += -2.0 * res * y_pred[c].Item2 * invN * invVar[c];
}
r++;
}
g.CopyTo(grad);
}
public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) {
base.Analyze(individuals, qualities, results, random);
if (!results.ContainsKey("Prediction (training)")) {
results.Add(new Result("Prediction (training)", typeof(ReadOnlyItemList)));
}
if (!results.ContainsKey("Prediction (test)")) {
results.Add(new Result("Prediction (test)", typeof(ReadOnlyItemList)));
}
if (!results.ContainsKey("Models")) {
results.Add(new Result("Models", typeof(VariableCollection)));
}
if(!results.ContainsKey("SNMSE")) {
results.Add(new Result("SNMSE", typeof(DoubleValue)));
}
var bestIndividualAndQuality = this.GetBestIndividual(individuals, qualities);
var trees = bestIndividualAndQuality.Item1.Values.Select(v => v.Value).OfType().ToArray(); // extract all trees from individual
results["SNMSE"].Value = new DoubleValue(bestIndividualAndQuality.Item2);
var problemData = ProblemData;
var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray();
var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
var trainingList = new ItemList();
if (OptimizeParametersForEpisodes) {
var eIdx = 0;
var trainingPredictions = new List[][]>();
foreach (var episode in TrainingEpisodes) {
var episodes = new[] { episode };
var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta_" + eIdx]).ToArray(); // see evaluate
var trainingPrediction = Integrate(
trees, // we assume trees contain expressions for the change of each target variable over time y'(t)
problemData.Dataset,
problemData.AllowedInputVariables.ToArray(),
targetVars,
latentVariables,
episodes,
optTheta,
OdeSolver,
NumericIntegrationSteps).ToArray();
trainingPredictions.Add(trainingPrediction);
eIdx++;
}
// only for target values
var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start));
for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
var targetVar = targetVars[colIdx];
var trainingDataTable = new DataTable(targetVar + " prediction (training)");
var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPredictions.SelectMany(arr => arr.Select(row => row[colIdx].Item1)).ToArray());
trainingDataTable.Rows.Add(actualValuesRow);
trainingDataTable.Rows.Add(predictedValuesRow);
trainingList.Add(trainingDataTable);
}
results["Prediction (training)"].Value = trainingList.AsReadOnly();
var models = new VariableCollection();
foreach (var tup in targetVars.Zip(trees, Tuple.Create)) {
var targetVarName = tup.Item1;
var tree = tup.Item2;
var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)");
origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
models.Add(origTreeVar);
}
results["Models"].Value = models;
} else {
var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta"]).ToArray(); // see evaluate
var trainingPrediction = Integrate(
trees, // we assume trees contain expressions for the change of each target variable over time dy/dt
problemData.Dataset,
problemData.AllowedInputVariables.ToArray(),
targetVars,
latentVariables,
TrainingEpisodes,
optTheta,
OdeSolver,
NumericIntegrationSteps).ToArray();
// for target values and latent variables
var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start));
for (int colIdx = 0; colIdx < trees.Length; colIdx++) {
// is target variable
if (colIdx < targetVars.Length) {
var targetVar = targetVars[colIdx];
var trainingDataTable = new DataTable(targetVar + " prediction (training)");
var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPrediction.Select(arr => arr[colIdx].Item1).ToArray());
trainingDataTable.Rows.Add(actualValuesRow);
trainingDataTable.Rows.Add(predictedValuesRow);
trainingList.Add(trainingDataTable);
} else {
var latentVar = latentVariables[colIdx - targetVars.Length];
var trainingDataTable = new DataTable(latentVar + " prediction (training)");
var predictedValuesRow = new DataRow(latentVar + " pred.", "Predicted values for " + latentVar, trainingPrediction.Select(arr => arr[colIdx].Item1).ToArray());
var emptyRow = new DataRow(latentVar);
trainingDataTable.Rows.Add(emptyRow);
trainingDataTable.Rows.Add(predictedValuesRow);
trainingList.Add(trainingDataTable);
}
}
// TODO: DRY for training and test
var testList = new ItemList();
var testRows = ProblemData.TestIndices.ToArray();
var testPrediction = Integrate(
trees, // we assume trees contain expressions for the change of each target variable over time y'(t)
problemData.Dataset,
problemData.AllowedInputVariables.ToArray(),
targetVars,
latentVariables,
new IntRange[] { ProblemData.TestPartition },
optTheta,
OdeSolver,
NumericIntegrationSteps).ToArray();
for (int colIdx = 0; colIdx < trees.Length; colIdx++) {
// is target variable
if (colIdx < targetVars.Length) {
var targetVar = targetVars[colIdx];
var testDataTable = new DataTable(targetVar + " prediction (test)");
var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, testRows));
var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
testDataTable.Rows.Add(actualValuesRow);
testDataTable.Rows.Add(predictedValuesRow);
testList.Add(testDataTable);
} else {
var latentVar = latentVariables[colIdx - targetVars.Length];
var testDataTable = new DataTable(latentVar + " prediction (test)");
var predictedValuesRow = new DataRow(latentVar + " pred.", "Predicted values for " + latentVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
var emptyRow = new DataRow(latentVar);
testDataTable.Rows.Add(emptyRow);
testDataTable.Rows.Add(predictedValuesRow);
testList.Add(testDataTable);
}
}
results["Prediction (training)"].Value = trainingList.AsReadOnly();
results["Prediction (test)"].Value = testList.AsReadOnly();
#region simplification of models
// TODO the dependency of HeuristicLab.Problems.DataAnalysis.Symbolic is not ideal
var models = new VariableCollection(); // to store target var names and original version of tree
int nextParIdx = 0;
for (int idx = 0; idx < trees.Length; idx++) {
var varName = string.Empty;
if (idx < targetVars.Length) {
varName = targetVars[idx];
} else {
varName = latentVariables[idx - targetVars.Length];
}
var tree = trees[idx];
// when we reference HeuristicLab.Problems.DataAnalysis.Symbolic we can translate symbols
var shownTree = new SymbolicExpressionTree(TranslateTreeNode(tree.Root, optTheta.ToArray(),
ref nextParIdx));
// var shownTree = (SymbolicExpressionTree)tree.Clone();
// var constantsNodeOrig = tree.IterateNodesPrefix().Where(IsConstantNode);
// var constantsNodeShown = shownTree.IterateNodesPrefix().Where(IsConstantNode);
//
// foreach (var n in constantsNodeOrig.Zip(constantsNodeShown, (original, shown) => new { original, shown })) {
// double constantsVal = optTheta[nodeIdx[n.original]];
//
// ConstantTreeNode replacementNode = new ConstantTreeNode(new Constant()) { Value = constantsVal };
//
// var parentNode = n.shown.Parent;
// int replacementIndex = parentNode.IndexOfSubtree(n.shown);
// parentNode.RemoveSubtree(replacementIndex);
// parentNode.InsertSubtree(replacementIndex, replacementNode);
// }
var origTreeVar = new HeuristicLab.Core.Variable(varName + "(original)");
origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
models.Add(origTreeVar);
var simplifiedTreeVar = new HeuristicLab.Core.Variable(varName + "(simplified)");
simplifiedTreeVar.Value = TreeSimplifier.Simplify(shownTree);
models.Add(simplifiedTreeVar);
}
results["Models"].Value = models;
#endregion
}
}
#region interpretation
// the following uses auto-diff to calculate the gradient w.r.t. the parameters forward in time.
// this is basically the method described in Gronwall T. Note on the derivatives with respect to a parameter of the solutions of a system of differential equations. Ann. Math. 1919;20:292–296.
// a comparison of three potential calculation methods for the gradient is given in:
// Sengupta, B., Friston, K. J., & Penny, W. D. (2014). Efficient gradient computation for dynamical models. Neuroimage, 98(100), 521–527. http://doi.org/10.1016/j.neuroimage.2014.04.040
// "Our comparison establishes that the adjoint method is computationally more efficient for numerical estimation of parametric gradients
// for state-space models — both linear and non-linear, as in the case of a dynamical causal model (DCM)"
// for a solver with the necessary features see: https://computation.llnl.gov/projects/sundials/cvodes
private static IEnumerable[]> Integrate(
ISymbolicExpressionTree[] trees, IDataset dataset,
string[] inputVariables, string[] targetVariables, string[] latentVariables, IEnumerable episodes,
double[] parameterValues,
string odeSolver, int numericIntegrationSteps = 100) {
// TODO: numericIntegrationSteps is only relevant for the HeuristicLab solver
var episodeIdx = 0;
foreach (var episode in episodes) {
var rows = Enumerable.Range(episode.Start, episode.End - episode.Start);
// integrate forward starting with known values for the target in t0
var variableValues = new Dictionary>();
var t0 = rows.First();
foreach (var varName in inputVariables) {
variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero));
}
foreach (var varName in targetVariables) {
variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero));
}
// add value entries for latent variables which are also integrated
// initial values are at the end of the parameter vector
// separete initial values for each episode
var initialValueIdx = parameterValues.Length - episodes.Count() * latentVariables.Length + episodeIdx * latentVariables.Length;
foreach (var latentVar in latentVariables) {
var arr = new double[parameterValues.Length]; // backing array
arr[initialValueIdx] = 1.0;
var g = new Vector(arr);
variableValues.Add(latentVar,
Tuple.Create(parameterValues[initialValueIdx], g)); // we don't have observations for latent variables therefore we optimize the initial value for each episode
initialValueIdx++;
}
var calculatedVariables = targetVariables.Concat(latentVariables).ToArray(); // TODO: must conincide with the order of trees in the encoding
// return first value as stored in the dataset
yield return calculatedVariables
.Select(calcVarName => variableValues[calcVarName])
.ToArray();
var prevT = rows.First(); // TODO: here we should use a variable for t if it is available. Right now we assume equidistant measurements.
foreach (var t in rows.Skip(1)) {
if (odeSolver == "HeuristicLab")
IntegrateHL(trees, calculatedVariables, variableValues, parameterValues, numericIntegrationSteps);
else if (odeSolver == "CVODES")
IntegrateCVODES(trees, calculatedVariables, variableValues, parameterValues, t - prevT);
else throw new InvalidOperationException("Unknown ODE solver " + odeSolver);
prevT = t;
// This check doesn't work with the HeuristicLab integrator if there are input variables
//if (variableValues.Count == targetVariables.Length) {
// only return the target variables for calculation of errors
var res = calculatedVariables
.Select(targetVar => variableValues[targetVar])
.ToArray();
if (res.Any(ri => double.IsNaN(ri.Item1) || double.IsInfinity(ri.Item1))) yield break;
yield return res;
//} else {
// yield break; // stop early on problems
//}
// update for next time step
foreach (var varName in inputVariables) {
variableValues[varName] = Tuple.Create(dataset.GetDoubleValue(varName, t), Vector.Zero);
}
}
episodeIdx++;
}
}
#region CVODES
///
/// Here we use CVODES to solve the ODE. Forward sensitivities are used to calculate the gradient for parameter optimization
///
/// Each equation in the ODE represented as a tree
/// The names of the calculated variables
/// The start values of the calculated variables as well as their sensitivites over parameters
/// The current parameter values
/// The time t up to which we need to integrate.
private static void IntegrateCVODES(
ISymbolicExpressionTree[] trees, // f(y,p) in tree representation
string[] calculatedVariables, // names of elements of y
Dictionary> variableValues, // y (intput and output) input: y(t0), output: y(t0+t)
double[] parameterValues, // p
double t // duration t for which we want to integrate
) {
// the RHS of the ODE
// dy/dt = f(y_t,x_t,p)
CVODES.CVRhsFunc f = CreateOdeRhs(trees, calculatedVariables, parameterValues);
// the Jacobian ∂f/∂y
CVODES.CVDlsJacFunc jac = CreateJac(trees, calculatedVariables, parameterValues);
// the RHS for the forward sensitivities (∂f/∂y)s_i(t) + ∂f/∂p_i
CVODES.CVSensRhsFn sensF = CreateSensitivityRhs(trees, calculatedVariables, parameterValues);
// setup solver
int numberOfEquations = trees.Length;
IntPtr y = IntPtr.Zero;
IntPtr cvode_mem = IntPtr.Zero;
IntPtr A = IntPtr.Zero;
IntPtr yS0 = IntPtr.Zero;
IntPtr linearSolver = IntPtr.Zero;
var ns = parameterValues.Length; // number of parameters
try {
y = CVODES.N_VNew_Serial(numberOfEquations);
// init y to current values of variables
// y must be initialized before calling CVodeInit
for (int i = 0; i < calculatedVariables.Length; i++) {
CVODES.NV_Set_Ith_S(y, i, variableValues[calculatedVariables[i]].Item1);
}
cvode_mem = CVODES.CVodeCreate(CVODES.MultistepMethod.CV_ADAMS, CVODES.NonlinearSolverIteration.CV_FUNCTIONAL);
var flag = CVODES.CVodeInit(cvode_mem, f, 0.0, y);
Debug.Assert(CVODES.CV_SUCCESS == flag);
double relTol = 1.0e-2;
double absTol = 1.0;
flag = CVODES.CVodeSStolerances(cvode_mem, relTol, absTol); // TODO: probably need to adjust absTol per variable
Debug.Assert(CVODES.CV_SUCCESS == flag);
A = CVODES.SUNDenseMatrix(numberOfEquations, numberOfEquations);
Debug.Assert(A != IntPtr.Zero);
linearSolver = CVODES.SUNDenseLinearSolver(y, A);
Debug.Assert(linearSolver != IntPtr.Zero);
flag = CVODES.CVDlsSetLinearSolver(cvode_mem, linearSolver, A);
Debug.Assert(CVODES.CV_SUCCESS == flag);
flag = CVODES.CVDlsSetJacFn(cvode_mem, jac);
Debug.Assert(CVODES.CV_SUCCESS == flag);
yS0 = CVODES.N_VCloneVectorArray_Serial(ns, y); // clone the output vector for each parameter
unsafe {
// set to initial sensitivities supplied by caller
for (int pIdx = 0; pIdx < ns; pIdx++) {
var yS0_i = *((IntPtr*)yS0.ToPointer() + pIdx);
for (var varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
CVODES.NV_Set_Ith_S(yS0_i, varIdx, variableValues[calculatedVariables[varIdx]].Item2[pIdx]);
}
}
}
flag = CVODES.CVodeSensInit(cvode_mem, ns, CVODES.CV_SIMULTANEOUS, sensF, yS0);
Debug.Assert(CVODES.CV_SUCCESS == flag);
flag = CVODES.CVodeSensEEtolerances(cvode_mem);
Debug.Assert(CVODES.CV_SUCCESS == flag);
// make one forward integration step
double tout = 0.0; // first output time
flag = CVODES.CVode(cvode_mem, t, y, ref tout, CVODES.CV_NORMAL);
if (flag == CVODES.CV_SUCCESS) {
Debug.Assert(t == tout);
// get sensitivities
flag = CVODES.CVodeGetSens(cvode_mem, ref tout, yS0);
Debug.Assert(CVODES.CV_SUCCESS == flag);
// update variableValues based on integration results
for (int varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
var yi = CVODES.NV_Get_Ith_S(y, varIdx);
var gArr = new double[parameterValues.Length];
for (var pIdx = 0; pIdx < parameterValues.Length; pIdx++) {
unsafe {
var yS0_pi = *((IntPtr*)yS0.ToPointer() + pIdx);
gArr[pIdx] = CVODES.NV_Get_Ith_S(yS0_pi, varIdx);
}
}
variableValues[calculatedVariables[varIdx]] = Tuple.Create(yi, new Vector(gArr));
}
} else {
variableValues.Clear(); // indicate problems by not returning new values
}
// cleanup all allocated objects
} finally {
if (y != IntPtr.Zero) CVODES.N_VDestroy_Serial(y);
if (cvode_mem != IntPtr.Zero) CVODES.CVodeFree(ref cvode_mem);
if (linearSolver != IntPtr.Zero) CVODES.SUNLinSolFree(linearSolver);
if (A != IntPtr.Zero) CVODES.SUNMatDestroy(A);
if (yS0 != IntPtr.Zero) CVODES.N_VDestroyVectorArray_Serial(yS0, ns);
}
}
private static CVODES.CVRhsFunc CreateOdeRhs(
ISymbolicExpressionTree[] trees,
string[] calculatedVariables,
double[] parameterValues) {
// we don't need to calculate a gradient here
return (double t,
IntPtr y, // N_Vector, current value of y (input)
IntPtr ydot, // N_Vector (calculated value of y' (output)
IntPtr user_data // optional user data, (unused here)
) => {
// TODO: perf
var nodeValues = new Dictionary>();
int pIdx = 0;
foreach (var tree in trees) {
foreach (var n in tree.IterateNodesPrefix()) {
if (IsConstantNode(n)) {
nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we do not need a gradient
pIdx++;
} else if (n.SubtreeCount == 0) {
// for variables and latent variables get the value from variableValues
var varName = n.Symbol.Name;
var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
if (varIdx < 0) throw new InvalidProgramException();
var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
nodeValues.Add(n, Tuple.Create(y_i, Vector.Zero)); // no gradient needed
}
}
}
for (int i = 0; i < trees.Length; i++) {
var tree = trees[i];
var res_i = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
CVODES.NV_Set_Ith_S(ydot, i, res_i.Item1);
}
return 0;
};
}
private static CVODES.CVDlsJacFunc CreateJac(
ISymbolicExpressionTree[] trees,
string[] calculatedVariables,
double[] parameterValues) {
return (
double t, // current time (input)
IntPtr y, // N_Vector, current value of y (input)
IntPtr fy, // N_Vector, current value of f (input)
IntPtr Jac, // SUNMatrix ∂f/∂y (output, rows i contains are ∂f_i/∂y vector)
IntPtr user_data, // optional (unused here)
IntPtr tmp1, // N_Vector, optional (unused here)
IntPtr tmp2, // N_Vector, optional (unused here)
IntPtr tmp3 // N_Vector, optional (unused here)
) => {
// here we need to calculate partial derivatives for the calculated variables y
var nodeValues = new Dictionary>();
int pIdx = 0;
foreach (var tree in trees) {
foreach (var n in tree.IterateNodesPrefix()) {
if (IsConstantNode(n)) {
nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we need a gradient over y which is zero for parameters
pIdx++;
} else if (n.SubtreeCount == 0) {
// for variables and latent variables we use values supplied in y and init gradient vectors accordingly
var varName = n.Symbol.Name;
var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
if (varIdx < 0) throw new InvalidProgramException();
var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
var gArr = new double[CVODES.NV_LENGTH_S(y)]; // backing array
gArr[varIdx] = 1.0;
var g = new Vector(gArr);
nodeValues.Add(n, Tuple.Create(y_i, g));
}
}
}
for (int i = 0; i < trees.Length; i++) {
var tree = trees[i];
var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
var g = res.Item2;
for (int j = 0; j < calculatedVariables.Length; j++) {
CVODES.SUNDenseMatrix_Set(Jac, i, j, g[j]);
}
}
return 0; // on success
};
}
// to calculate sensitivities RHS for all equations at once
// must compute (∂f/∂y)s_i(t) + ∂f/∂p_i and store in ySdot.
// Index i refers to parameters, dimensionality of matrix and vectors is number of equations
private static CVODES.CVSensRhsFn CreateSensitivityRhs(ISymbolicExpressionTree[] trees, string[] calculatedVariables, double[] parameterValues) {
return (
int Ns, // number of parameters
double t, // current time
IntPtr y, // N_Vector y(t) (input)
IntPtr ydot, // N_Vector dy/dt(t) (input)
IntPtr yS, // N_Vector*, one vector for each parameter (input)
IntPtr ySdot, // N_Vector*, one vector for each parameter (output)
IntPtr user_data, // optional (unused here)
IntPtr tmp1, // N_Vector, optional (unused here)
IntPtr tmp2 // N_Vector, optional (unused here)
) => {
// here we need to calculate partial derivatives for the calculated variables y as well as for the parameters
var nodeValues = new Dictionary>();
var d = calculatedVariables.Length + parameterValues.Length; // dimensionality of gradient
// first collect variable values
foreach (var tree in trees) {
foreach (var n in tree.IterateNodesPrefix()) {
if (IsVariableNode(n)) {
// for variables and latent variables we use values supplied in y and init gradient vectors accordingly
var varName = n.Symbol.Name;
var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
if (varIdx < 0) throw new InvalidProgramException();
var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
var gArr = new double[d]; // backing array
gArr[varIdx] = 1.0;
var g = new Vector(gArr);
nodeValues.Add(n, Tuple.Create(y_i, g));
}
}
}
// then collect constants
int pIdx = 0;
foreach (var tree in trees) {
foreach (var n in tree.IterateNodesPrefix()) {
if (IsConstantNode(n)) {
var gArr = new double[d];
gArr[calculatedVariables.Length + pIdx] = 1.0;
var g = new Vector(gArr);
nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], g));
pIdx++;
}
}
}
// gradient vector is [∂f/∂y_1, ∂f/∂y_2, ... ∂f/∂yN, ∂f/∂p_1 ... ∂f/∂p_K]
for (pIdx = 0; pIdx < Ns; pIdx++) {
unsafe {
var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx);
CVODES.N_VConst_Serial(0.0, sDot_pi);
}
}
for (int i = 0; i < trees.Length; i++) {
var tree = trees[i];
var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
var g = res.Item2;
// update ySdot = (∂f/∂y)s_i(t) + ∂f/∂p_i
for (pIdx = 0; pIdx < Ns; pIdx++) {
unsafe {
var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx);
var s_pi = *((IntPtr*)yS.ToPointer() + pIdx);
var v = CVODES.NV_Get_Ith_S(sDot_pi, i);
// (∂f/∂y)s_i(t)
var p = 0.0;
for (int yIdx = 0; yIdx < calculatedVariables.Length; yIdx++) {
p += g[yIdx] * CVODES.NV_Get_Ith_S(s_pi, yIdx);
}
// + ∂f/∂p_i
CVODES.NV_Set_Ith_S(sDot_pi, i, v + p + g[calculatedVariables.Length + pIdx]);
}
}
}
return 0; // on success
};
}
#endregion
private static void IntegrateHL(
ISymbolicExpressionTree[] trees,
string[] calculatedVariables, // names of integrated variables
Dictionary> variableValues, //y (intput and output) input: y(t0), output: y(t0+1)
double[] parameterValues,
int numericIntegrationSteps) {
var nodeValues = new Dictionary>();
// the nodeValues for parameters are constant over time
// TODO: this needs to be done only once for each iteration in gradient descent (whenever parameter values change)
// NOTE: the order must be the same as above (prefix order for nodes)
int pIdx = 0;
foreach (var tree in trees) {
foreach (var node in tree.Root.IterateNodesPrefix()) {
if (IsConstantNode(node)) {
var gArr = new double[parameterValues.Length]; // backing array
gArr[pIdx] = 1.0;
var g = new Vector(gArr);
nodeValues.Add(node, new Tuple(parameterValues[pIdx], g));
pIdx++;
} else if (node.SubtreeCount == 0) {
// for (latent) variables get the values from variableValues
var varName = node.Symbol.Name;
nodeValues.Add(node, variableValues[varName]);
}
}
}
double h = 1.0 / numericIntegrationSteps;
for (int step = 0; step < numericIntegrationSteps; step++) {
var deltaValues = new Dictionary>();
for (int i = 0; i < trees.Length; i++) {
var tree = trees[i];
var targetVarName = calculatedVariables[i];
// Root.GetSubtree(0).GetSubtree(0) skips programRoot and startSymbol
var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
deltaValues.Add(targetVarName, res);
}
// update variableValues for next step, trapezoid integration
foreach (var kvp in deltaValues) {
var oldVal = variableValues[kvp.Key];
var newVal = Tuple.Create(
oldVal.Item1 + h * kvp.Value.Item1,
oldVal.Item2 + h * kvp.Value.Item2
);
variableValues[kvp.Key] = newVal;
}
foreach(var node in nodeValues.Keys.ToArray()) {
if(node.SubtreeCount == 0 && !IsConstantNode(node)) {
// update values for (latent) variables
var varName = node.Symbol.Name;
nodeValues[node] = variableValues[varName];
}
}
}
}
private static Tuple InterpretRec(
ISymbolicExpressionTreeNode node,
Dictionary> nodeValues // contains value and gradient vector for a node (variables and constants only)
) {
switch (node.Symbol.Name) {
case "+": {
var l = InterpretRec(node.GetSubtree(0), nodeValues);
var r = InterpretRec(node.GetSubtree(1), nodeValues);
return Tuple.Create(l.Item1 + r.Item1, l.Item2 + r.Item2);
}
case "*": {
var l = InterpretRec(node.GetSubtree(0), nodeValues);
var r = InterpretRec(node.GetSubtree(1), nodeValues);
return Tuple.Create(l.Item1 * r.Item1, l.Item2 * r.Item1 + l.Item1 * r.Item2);
}
case "-": {
var l = InterpretRec(node.GetSubtree(0), nodeValues);
var r = InterpretRec(node.GetSubtree(1), nodeValues);
return Tuple.Create(l.Item1 - r.Item1, l.Item2 - r.Item2);
}
case "%": {
var l = InterpretRec(node.GetSubtree(0), nodeValues);
var r = InterpretRec(node.GetSubtree(1), nodeValues);
// protected division
if (r.Item1.IsAlmost(0.0)) {
return Tuple.Create(0.0, Vector.Zero);
} else {
return Tuple.Create(
l.Item1 / r.Item1,
l.Item1 * -1.0 / (r.Item1 * r.Item1) * r.Item2 + 1.0 / r.Item1 * l.Item2 // (f/g)' = f * (1/g)' + 1/g * f' = f * -1/g² * g' + 1/g * f'
);
}
}
case "sin": {
var x = InterpretRec(node.GetSubtree(0), nodeValues);
return Tuple.Create(
Math.Sin(x.Item1),
Vector.Cos(x.Item2) * x.Item2
);
}
case "cos": {
var x = InterpretRec(node.GetSubtree(0), nodeValues);
return Tuple.Create(
Math.Cos(x.Item1),
-Vector.Sin(x.Item2) * x.Item2
);
}
case "sqr": {
var x = InterpretRec(node.GetSubtree(0), nodeValues);
return Tuple.Create(
x.Item1 * x.Item1,
2.0 * x.Item1 * x.Item2
);
}
default: {
return nodeValues[node]; // value and gradient for constants and variables must be set by the caller
}
}
}
#endregion
#region events
/*
* Dependencies between parameters:
*
* ProblemData
* |
* V
* TargetVariables FunctionSet MaximumLength NumberOfLatentVariables
* | | | |
* V V | |
* Grammar <---------------+-------------------
* |
* V
* Encoding
*/
private void RegisterEventHandlers() {
ProblemDataParameter.ValueChanged += ProblemDataParameter_ValueChanged;
if (ProblemDataParameter.Value != null) ProblemDataParameter.Value.Changed += ProblemData_Changed;
TargetVariablesParameter.ValueChanged += TargetVariablesParameter_ValueChanged;
if (TargetVariablesParameter.Value != null) TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
FunctionSetParameter.ValueChanged += FunctionSetParameter_ValueChanged;
if (FunctionSetParameter.Value != null) FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
MaximumLengthParameter.Value.ValueChanged += MaximumLengthChanged;
NumberOfLatentVariablesParameter.Value.ValueChanged += NumLatentVariablesChanged;
}
private void NumLatentVariablesChanged(object sender, EventArgs e) {
UpdateGrammarAndEncoding();
}
private void MaximumLengthChanged(object sender, EventArgs e) {
UpdateGrammarAndEncoding();
}
private void FunctionSetParameter_ValueChanged(object sender, EventArgs e) {
FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
}
private void CheckedFunctionsChanged(object sender, CollectionItemsChangedEventArgs> e) {
UpdateGrammarAndEncoding();
}
private void TargetVariablesParameter_ValueChanged(object sender, EventArgs e) {
TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
}
private void CheckedTargetVariablesChanged(object sender, CollectionItemsChangedEventArgs> e) {
UpdateGrammarAndEncoding();
}
private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) {
ProblemDataParameter.Value.Changed += ProblemData_Changed;
OnProblemDataChanged();
OnReset();
}
private void ProblemData_Changed(object sender, EventArgs e) {
OnProblemDataChanged();
OnReset();
}
private void OnProblemDataChanged() {
UpdateTargetVariables(); // implicitly updates other dependent parameters
var handler = ProblemDataChanged;
if (handler != null) handler(this, EventArgs.Empty);
}
#endregion
#region helper
private void InitAllParameters() {
UpdateTargetVariables(); // implicitly updates the grammar and the encoding
}
private ReadOnlyCheckedItemList CreateFunctionSet() {
var l = new CheckedItemList();
l.Add(new StringValue("+").AsReadOnly());
l.Add(new StringValue("*").AsReadOnly());
l.Add(new StringValue("%").AsReadOnly());
l.Add(new StringValue("-").AsReadOnly());
l.Add(new StringValue("sin").AsReadOnly());
l.Add(new StringValue("cos").AsReadOnly());
l.Add(new StringValue("sqr").AsReadOnly());
return l.AsReadOnly();
}
private static bool IsConstantNode(ISymbolicExpressionTreeNode n) {
return n.Symbol.Name.StartsWith("θ");
}
private static bool IsLatentVariableNode(ISymbolicExpressionTreeNode n) {
return n.Symbol.Name.StartsWith("λ");
}
private static bool IsVariableNode(ISymbolicExpressionTreeNode n) {
return (n.SubtreeCount == 0) && !IsConstantNode(n) && !IsLatentVariableNode(n);
}
private void UpdateTargetVariables() {
var currentlySelectedVariables = TargetVariables.CheckedItems
.OrderBy(i => i.Index)
.Select(i => i.Value.Value)
.ToArray();
var newVariablesList = new CheckedItemList(ProblemData.Dataset.VariableNames.Select(str => new StringValue(str).AsReadOnly()).ToArray()).AsReadOnly();
var matchingItems = newVariablesList.Where(item => currentlySelectedVariables.Contains(item.Value)).ToArray();
foreach (var matchingItem in matchingItems) {
newVariablesList.SetItemCheckedState(matchingItem, true);
}
TargetVariablesParameter.Value = newVariablesList;
}
private void UpdateGrammarAndEncoding() {
var encoding = new MultiEncoding();
var g = CreateGrammar();
foreach (var targetVar in TargetVariables.CheckedItems) {
encoding = encoding.Add(new SymbolicExpressionTreeEncoding(targetVar + "_tree", g, MaximumLength, MaximumLength)); // only limit by length
}
for (int i = 1; i <= NumberOfLatentVariables; i++) {
encoding = encoding.Add(new SymbolicExpressionTreeEncoding("λ" + i + "_tree", g, MaximumLength, MaximumLength));
}
Encoding = encoding;
}
private ISymbolicExpressionGrammar CreateGrammar() {
// whenever ProblemData is changed we create a new grammar with the necessary symbols
var g = new SimpleSymbolicExpressionGrammar();
g.AddSymbols(FunctionSet.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray(), 2, 2);
// TODO
//g.AddSymbols(new[] {
// "exp",
// "log", // log( ) // TODO: init a theta to ensure the value is always positive
// "exp_minus" // exp((-1) *
//}, 1, 1);
foreach (var variableName in ProblemData.AllowedInputVariables.Union(TargetVariables.CheckedItems.Select(i => i.Value.Value)))
g.AddTerminalSymbol(variableName);
// generate symbols for numeric parameters for which the value is optimized using AutoDiff
// we generate multiple symbols to balance the probability for selecting a numeric parameter in the generation of random trees
var numericConstantsFactor = 2.0;
for (int i = 0; i < numericConstantsFactor * (ProblemData.AllowedInputVariables.Count() + TargetVariables.CheckedItems.Count()); i++) {
g.AddTerminalSymbol("θ" + i); // numeric parameter for which the value is optimized using AutoDiff
}
// generate symbols for latent variables
for (int i = 1; i <= NumberOfLatentVariables; i++) {
g.AddTerminalSymbol("λ" + i); // numeric parameter for which the value is optimized using AutoDiff
}
return g;
}
private ISymbolicExpressionTreeNode TranslateTreeNode(ISymbolicExpressionTreeNode n, double[] parameterValues, ref int nextParIdx) {
ISymbolicExpressionTreeNode translatedNode = null;
if (n.Symbol is StartSymbol) {
translatedNode = new StartSymbol().CreateTreeNode();
} else if (n.Symbol is ProgramRootSymbol) {
translatedNode = new ProgramRootSymbol().CreateTreeNode();
} else if (n.Symbol.Name == "+") {
translatedNode = new Addition().CreateTreeNode();
} else if (n.Symbol.Name == "-") {
translatedNode = new Subtraction().CreateTreeNode();
} else if (n.Symbol.Name == "*") {
translatedNode = new Multiplication().CreateTreeNode();
} else if (n.Symbol.Name == "%") {
translatedNode = new Division().CreateTreeNode();
} else if (n.Symbol.Name == "sin") {
translatedNode = new Sine().CreateTreeNode();
} else if (n.Symbol.Name == "cos") {
translatedNode = new Cosine().CreateTreeNode();
} else if (n.Symbol.Name == "sqr") {
translatedNode = new Square().CreateTreeNode();
} else if (IsConstantNode(n)) {
var constNode = (ConstantTreeNode)new Constant().CreateTreeNode();
constNode.Value = parameterValues[nextParIdx];
nextParIdx++;
translatedNode = constNode;
} else {
// assume a variable name
var varName = n.Symbol.Name;
var varNode = (VariableTreeNode)new Variable().CreateTreeNode();
varNode.Weight = 1.0;
varNode.VariableName = varName;
translatedNode = varNode;
}
foreach (var child in n.Subtrees) {
translatedNode.AddSubtree(TranslateTreeNode(child, parameterValues, ref nextParIdx));
}
return translatedNode;
}
#endregion
#region Import & Export
public void Load(IRegressionProblemData data) {
Name = data.Name;
Description = data.Description;
ProblemData = data;
}
public IRegressionProblemData Export() {
return ProblemData;
}
#endregion
}
}