Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Problem.cs @ 16399

Last change on this file since 16399 was 16399, checked in by gkronber, 6 years ago

#2925: solution class and solution view

File size: 64.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Linq;
26using HeuristicLab.Analysis;
27using HeuristicLab.Collections;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Data;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
32using HeuristicLab.Optimization;
33using HeuristicLab.Parameters;
34using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
35using HeuristicLab.Problems.DataAnalysis;
36using HeuristicLab.Problems.DataAnalysis.Symbolic;
37using HeuristicLab.Problems.Instances;
38using Variable = HeuristicLab.Problems.DataAnalysis.Symbolic.Variable;
39
40namespace HeuristicLab.Problems.DynamicalSystemsModelling {
41  // Eine weitere Möglichkeit ist spline-smoothing der Daten (über Zeit) um damit für jede Zielvariable
42  // einen bereinigten (Rauschen) Wert und die Ableitung dy/dt für alle Beobachtungen zu bekommen
43  // danach kann man die Regression direkt für dy/dt anwenden (mit bereinigten y als inputs)
44  [Item("Dynamical Systems Modelling Problem", "TODO")]
45  [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 900)]
46  [StorableClass]
47  public sealed class Problem : SingleObjectiveBasicProblem<MultiEncoding>, IRegressionProblem, IProblemInstanceConsumer<IRegressionProblemData>, IProblemInstanceExporter<IRegressionProblemData> {
48    #region parameter names
49    private const string ProblemDataParameterName = "Data";
50    private const string TargetVariablesParameterName = "Target variables";
51    private const string FunctionSetParameterName = "Function set";
52    private const string MaximumLengthParameterName = "Size limit";
53    private const string MaximumParameterOptimizationIterationsParameterName = "Max. parameter optimization iterations";
54    private const string NumberOfLatentVariablesParameterName = "Number of latent variables";
55    private const string NumericIntegrationStepsParameterName = "Steps for numeric integration";
56    private const string TrainingEpisodesParameterName = "Training episodes";
57    private const string OptimizeParametersForEpisodesParameterName = "Optimize parameters for episodes";
58    private const string OdeSolverParameterName = "ODE Solver";
59    #endregion
60
61    #region Parameter Properties
62    IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } }
63
64    public IValueParameter<IRegressionProblemData> ProblemDataParameter {
65      get { return (IValueParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
66    }
67    public IValueParameter<ReadOnlyCheckedItemList<StringValue>> TargetVariablesParameter {
68      get { return (IValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[TargetVariablesParameterName]; }
69    }
70    public IValueParameter<ReadOnlyCheckedItemList<StringValue>> FunctionSetParameter {
71      get { return (IValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[FunctionSetParameterName]; }
72    }
73    public IFixedValueParameter<IntValue> MaximumLengthParameter {
74      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumLengthParameterName]; }
75    }
76    public IFixedValueParameter<IntValue> MaximumParameterOptimizationIterationsParameter {
77      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumParameterOptimizationIterationsParameterName]; }
78    }
79    public IFixedValueParameter<IntValue> NumberOfLatentVariablesParameter {
80      get { return (IFixedValueParameter<IntValue>)Parameters[NumberOfLatentVariablesParameterName]; }
81    }
82    public IFixedValueParameter<IntValue> NumericIntegrationStepsParameter {
83      get { return (IFixedValueParameter<IntValue>)Parameters[NumericIntegrationStepsParameterName]; }
84    }
85    public IValueParameter<ItemList<IntRange>> TrainingEpisodesParameter {
86      get { return (IValueParameter<ItemList<IntRange>>)Parameters[TrainingEpisodesParameterName]; }
87    }
88    public IFixedValueParameter<BoolValue> OptimizeParametersForEpisodesParameter {
89      get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizeParametersForEpisodesParameterName]; }
90    }
91    public IConstrainedValueParameter<StringValue> OdeSolverParameter {
92      get { return (IConstrainedValueParameter<StringValue>)Parameters[OdeSolverParameterName]; }
93    }
94    #endregion
95
96    #region Properties
97    public IRegressionProblemData ProblemData {
98      get { return ProblemDataParameter.Value; }
99      set { ProblemDataParameter.Value = value; }
100    }
101    IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } }
102
103    public ReadOnlyCheckedItemList<StringValue> TargetVariables {
104      get { return TargetVariablesParameter.Value; }
105    }
106
107    public ReadOnlyCheckedItemList<StringValue> FunctionSet {
108      get { return FunctionSetParameter.Value; }
109    }
110
111    public int MaximumLength {
112      get { return MaximumLengthParameter.Value.Value; }
113    }
114    public int MaximumParameterOptimizationIterations {
115      get { return MaximumParameterOptimizationIterationsParameter.Value.Value; }
116    }
117    public int NumberOfLatentVariables {
118      get { return NumberOfLatentVariablesParameter.Value.Value; }
119    }
120    public int NumericIntegrationSteps {
121      get { return NumericIntegrationStepsParameter.Value.Value; }
122    }
123    public IEnumerable<IntRange> TrainingEpisodes {
124      get { return TrainingEpisodesParameter.Value; }
125    }
126    public bool OptimizeParametersForEpisodes {
127      get { return OptimizeParametersForEpisodesParameter.Value.Value; }
128    }
129
130    public string OdeSolver {
131      get { return OdeSolverParameter.Value.Value; }
132      set {
133        var matchingValue = OdeSolverParameter.ValidValues.FirstOrDefault(v => v.Value == value);
134        if (matchingValue == null) throw new ArgumentOutOfRangeException();
135        else OdeSolverParameter.Value = matchingValue;
136      }
137    }
138
139    #endregion
140
141    public event EventHandler ProblemDataChanged;
142
143    public override bool Maximization {
144      get { return false; } // we minimize NMSE
145    }
146
147    #region item cloning and persistence
148    // persistence
149    [StorableConstructor]
150    private Problem(bool deserializing) : base(deserializing) { }
151    [StorableHook(HookType.AfterDeserialization)]
152    private void AfterDeserialization() {
153      if (!Parameters.ContainsKey(OptimizeParametersForEpisodesParameterName)) {
154        Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
155      }
156      RegisterEventHandlers();
157    }
158
159    // cloning
160    private Problem(Problem original, Cloner cloner)
161      : base(original, cloner) {
162      RegisterEventHandlers();
163    }
164    public override IDeepCloneable Clone(Cloner cloner) { return new Problem(this, cloner); }
165    #endregion
166
167    public Problem()
168      : base() {
169      var targetVariables = new CheckedItemList<StringValue>().AsReadOnly(); // HACK: it would be better to provide a new class derived from IDataAnalysisProblem
170      var functions = CreateFunctionSet();
171      Parameters.Add(new ValueParameter<IRegressionProblemData>(ProblemDataParameterName, "The data captured from the dynamical system. Use CSV import functionality to import data.", new RegressionProblemData()));
172      Parameters.Add(new ValueParameter<ReadOnlyCheckedItemList<StringValue>>(TargetVariablesParameterName, "Target variables (overrides setting in ProblemData)", targetVariables));
173      Parameters.Add(new ValueParameter<ReadOnlyCheckedItemList<StringValue>>(FunctionSetParameterName, "The list of allowed functions", functions));
174      Parameters.Add(new FixedValueParameter<IntValue>(MaximumLengthParameterName, "The maximally allowed length of each expression. Set to a small value (5 - 25). Default = 10", new IntValue(10)));
175      Parameters.Add(new FixedValueParameter<IntValue>(MaximumParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of parameters (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(100)));
176      Parameters.Add(new FixedValueParameter<IntValue>(NumberOfLatentVariablesParameterName, "Latent variables (unobserved variables) allow us to produce expressions which are integrated up and can be used in other expressions. They are handled similarly to target variables in forward simulation / integration. The difference to target variables is that there are no data to which the calculated values of latent variables are compared. Set to a small value (0 .. 5) as necessary (default = 0)", new IntValue(0)));
177      Parameters.Add(new FixedValueParameter<IntValue>(NumericIntegrationStepsParameterName, "Number of steps in the numeric integration that are taken from one row to the next (set to 1 to 100). More steps makes the algorithm slower, less steps worsens the accuracy of the numeric integration scheme.", new IntValue(10)));
178      Parameters.Add(new ValueParameter<ItemList<IntRange>>(TrainingEpisodesParameterName, "A list of ranges that should be used for training, each range represents an independent episode. This overrides the TrainingSet parameter in ProblemData.", new ItemList<IntRange>()));
179      Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
180
181      var solversStr = new string[] { "HeuristicLab", "CVODES" };
182      var solvers = new ItemSet<StringValue>(
183        solversStr.Select(s => new StringValue(s).AsReadOnly())
184        );
185      Parameters.Add(new ConstrainedValueParameter<StringValue>(OdeSolverParameterName, "The solver to use for solving the initial value ODE problems", solvers, solvers.First()));
186
187      RegisterEventHandlers();
188      InitAllParameters();
189
190      // TODO: do not clear selection of target variables when the input variables are changed (keep selected target variables)
191      // TODO: UI hangs when selecting / deselecting input variables because the encoding is updated on each item
192      // TODO: use training range as default training episode
193      // TODO: write back optimized parameters to solution?
194      // TODO: optimization of starting values for latent variables in CVODES solver
195
196    }
197
198    public override double Evaluate(Individual individual, IRandom random) {
199      var trees = individual.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
200                                                                                                      // write back optimized parameters to tree nodes instead of the separate OptTheta variable
201                                                                                                      // retreive optimized parameters from nodes?
202
203      var problemData = ProblemData;
204      var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray();
205      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
206      if (OptimizeParametersForEpisodes) {
207        int eIdx = 0;
208        double totalNMSE = 0.0;
209        int totalSize = 0;
210        foreach (var episode in TrainingEpisodes) {
211          double[] optTheta;
212          double nmse;
213          OptimizeForEpisodes(trees, problemData, targetVars, latentVariables, random, new[] { episode }, MaximumParameterOptimizationIterations, NumericIntegrationSteps, OdeSolver, out optTheta, out nmse);
214          individual["OptTheta_" + eIdx] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
215          eIdx++;
216          totalNMSE += nmse * episode.Size;
217          totalSize += episode.Size;
218        }
219        return totalNMSE / totalSize;
220      } else {
221        double[] optTheta;
222        double nmse;
223        OptimizeForEpisodes(trees, problemData, targetVars, latentVariables, random, TrainingEpisodes, MaximumParameterOptimizationIterations, NumericIntegrationSteps, OdeSolver, out optTheta, out nmse);
224        individual["OptTheta"] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
225        return nmse;
226      }
227    }
228
229    public static void OptimizeForEpisodes(
230      ISymbolicExpressionTree[] trees,
231      IRegressionProblemData problemData,
232      string[] targetVars,
233      string[] latentVariables,
234      IRandom random,
235      IEnumerable<IntRange> episodes,
236      int maxParameterOptIterations,
237      int numericIntegrationSteps,
238      string odeSolver,
239      out double[] optTheta,
240      out double nmse) {
241      var rows = episodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start)).ToArray();
242      var targetValues = new double[rows.Length, targetVars.Length];
243
244      // collect values of all target variables
245      var colIdx = 0;
246      foreach (var targetVar in targetVars) {
247        int rowIdx = 0;
248        foreach (var value in problemData.Dataset.GetDoubleValues(targetVar, rows)) {
249          targetValues[rowIdx, colIdx] = value;
250          rowIdx++;
251        }
252        colIdx++;
253      }
254
255      // NOTE: the order of values in parameter matches prefix order of constant nodes in trees
256      var paramNodes = new List<ISymbolicExpressionTreeNode>();
257      foreach (var t in trees) {
258        foreach (var n in t.IterateNodesPrefix()) {
259          if (IsConstantNode(n))
260            paramNodes.Add(n);
261        }
262      }
263      // init params randomly
264      // theta contains parameter values for trees and then the initial values for latent variables (a separate vector for each episode)
265      // inital values for latent variables are also optimized
266      var theta = new double[paramNodes.Count + latentVariables.Length * episodes.Count()];
267      for (int i = 0; i < theta.Length; i++)
268        theta[i] = random.NextDouble() * 2.0 - 1.0;
269
270      optTheta = new double[0];
271      if (theta.Length > 0) {
272        alglib.minlbfgsstate state;
273        alglib.minlbfgsreport report;
274        alglib.minlbfgscreate(Math.Min(theta.Length, 5), theta, out state);
275        alglib.minlbfgssetcond(state, 0.0, 0.0, 0.0, maxParameterOptIterations);
276        //alglib.minlbfgssetgradientcheck(state, 1e-6);
277        alglib.minlbfgsoptimize(state, EvaluateObjectiveAndGradient, null,
278          new object[] { trees, targetVars, problemData, targetValues, episodes.ToArray(), numericIntegrationSteps, latentVariables, odeSolver }); //TODO: create a type
279
280        alglib.minlbfgsresults(state, out optTheta, out report);
281
282        /*
283         *
284         *         L-BFGS algorithm results
285
286          INPUT PARAMETERS:
287              State   -   algorithm state
288
289          OUTPUT PARAMETERS:
290              X       -   array[0..N-1], solution
291              Rep     -   optimization report:
292                          * Rep.TerminationType completetion code:
293                              * -7    gradient verification failed.
294                                      See MinLBFGSSetGradientCheck() for more information.
295                              * -2    rounding errors prevent further improvement.
296                                      X contains best point found.
297                              * -1    incorrect parameters were specified
298                              *  1    relative function improvement is no more than
299                                      EpsF.
300                              *  2    relative step is no more than EpsX.
301                              *  4    gradient norm is no more than EpsG
302                              *  5    MaxIts steps was taken
303                              *  7    stopping conditions are too stringent,
304                                      further improvement is impossible
305                          * Rep.IterationsCount contains iterations count
306                          * NFEV countains number of function calculations
307         */
308        if (report.terminationtype < 0) { nmse = 10E6; return; }
309      }
310
311      // perform evaluation for optimal theta to get quality value
312      double[] grad = new double[optTheta.Length];
313      nmse = double.NaN;
314      EvaluateObjectiveAndGradient(optTheta, ref nmse, grad,
315        new object[] { trees, targetVars, problemData, targetValues, episodes.ToArray(), numericIntegrationSteps, latentVariables, odeSolver });
316      if (double.IsNaN(nmse) || double.IsInfinity(nmse)) { nmse = 10E6; return; } // return a large value (TODO: be consistent by using NMSE)
317    }
318
319    private static void EvaluateObjectiveAndGradient(double[] x, ref double f, double[] grad, object obj) {
320      var trees = (ISymbolicExpressionTree[])((object[])obj)[0];
321      var targetVariables = (string[])((object[])obj)[1];
322      var problemData = (IRegressionProblemData)((object[])obj)[2];
323      var targetValues = (double[,])((object[])obj)[3];
324      var episodes = (IntRange[])((object[])obj)[4];
325      var numericIntegrationSteps = (int)((object[])obj)[5];
326      var latentVariables = (string[])((object[])obj)[6];
327      var odeSolver = (string)((object[])obj)[7];
328
329
330      Tuple<double, Vector>[][] predicted = null; // one array of predictions for each episode
331      // TODO: stop integration early for diverging solutions
332      predicted = Integrate(
333          trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
334          problemData.Dataset,
335          problemData.AllowedInputVariables.ToArray(),
336          targetVariables,
337          latentVariables,
338          episodes,
339          x,
340          odeSolver,
341          numericIntegrationSteps).ToArray();
342
343      if (predicted.Length != targetValues.GetLength(0)) {
344        f = double.MaxValue;
345        Array.Clear(grad, 0, grad.Length);
346        return;
347      }
348
349      // for normalized MSE = 1/variance(t) * MSE(t, pred)
350      // TODO: Perf. (by standardization of target variables before evaluation of all trees)     
351      var invVar = Enumerable.Range(0, targetVariables.Length)
352        .Select(c => Enumerable.Range(0, targetValues.GetLength(0)).Select(row => targetValues[row, c])) // column vectors
353        .Select(vec => vec.Variance())
354        .Select(v => 1.0 / v)
355        .ToArray();
356
357      // objective function is NMSE
358      f = 0.0;
359      int n = predicted.Length;
360      double invN = 1.0 / n;
361      var g = Vector.Zero;
362      int r = 0;
363      foreach (var y_pred in predicted) {
364        // y_pred contains the predicted values for target variables first and then predicted values for latent variables
365        for (int c = 0; c < targetVariables.Length; c++) {
366
367          var y_pred_f = y_pred[c].Item1;
368          var y = targetValues[r, c];
369
370          var res = (y - y_pred_f);
371          var ressq = res * res;
372          f += ressq * invN * invVar[c];
373          g += -2.0 * res * y_pred[c].Item2 * invN * invVar[c];
374        }
375        r++;
376      }
377
378      g.CopyTo(grad);
379    }
380
381    public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) {
382      base.Analyze(individuals, qualities, results, random);
383
384      if (!results.ContainsKey("Prediction (training)")) {
385        results.Add(new Result("Prediction (training)", typeof(ReadOnlyItemList<DataTable>)));
386      }
387      if (!results.ContainsKey("Prediction (test)")) {
388        results.Add(new Result("Prediction (test)", typeof(ReadOnlyItemList<DataTable>)));
389      }
390      if (!results.ContainsKey("Models")) {
391        results.Add(new Result("Models", typeof(VariableCollection)));
392      }
393      if (!results.ContainsKey("SNMSE")) {
394        results.Add(new Result("SNMSE", typeof(DoubleValue)));
395      }
396      if (!results.ContainsKey("Solution")) {
397        results.Add(new Result("Solution", typeof(Solution)));
398      }
399
400      var bestIndividualAndQuality = this.GetBestIndividual(individuals, qualities);
401      var trees = bestIndividualAndQuality.Item1.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
402
403      results["SNMSE"].Value = new DoubleValue(bestIndividualAndQuality.Item2);
404
405      var problemData = ProblemData;
406      var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray();
407      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
408
409      var trainingList = new ItemList<DataTable>();
410
411      if (OptimizeParametersForEpisodes) {
412        var eIdx = 0;
413        var trainingPredictions = new List<Tuple<double, Vector>[][]>();
414        foreach (var episode in TrainingEpisodes) {
415          var episodes = new[] { episode };
416          var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta_" + eIdx]).ToArray(); // see evaluate
417          var trainingPrediction = Integrate(
418                                   trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
419                                   problemData.Dataset,
420                                   problemData.AllowedInputVariables.ToArray(),
421                                   targetVars,
422                                   latentVariables,
423                                   episodes,
424                                   optTheta,
425                                   OdeSolver,
426                                   NumericIntegrationSteps).ToArray();
427          trainingPredictions.Add(trainingPrediction);
428          eIdx++;
429        }
430
431        // only for target values
432        var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start));
433        for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
434          var targetVar = targetVars[colIdx];
435          var trainingDataTable = new DataTable(targetVar + " prediction (training)");
436          var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
437          var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPredictions.SelectMany(arr => arr.Select(row => row[colIdx].Item1)).ToArray());
438          trainingDataTable.Rows.Add(actualValuesRow);
439          trainingDataTable.Rows.Add(predictedValuesRow);
440          trainingList.Add(trainingDataTable);
441        }
442        results["Prediction (training)"].Value = trainingList.AsReadOnly();
443
444
445        var models = new VariableCollection();
446
447        foreach (var tup in targetVars.Zip(trees, Tuple.Create)) {
448          var targetVarName = tup.Item1;
449          var tree = tup.Item2;
450
451          var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)");
452          origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
453          models.Add(origTreeVar);
454        }
455        results["Models"].Value = models;
456      } else {
457        var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta"]).ToArray(); // see evaluate
458        var trainingPrediction = Integrate(
459                                   trees,  // we assume trees contain expressions for the change of each target variable over time dy/dt
460                                   problemData.Dataset,
461                                   problemData.AllowedInputVariables.ToArray(),
462                                   targetVars,
463                                   latentVariables,
464                                   TrainingEpisodes,
465                                   optTheta,
466                                   OdeSolver,
467                                   NumericIntegrationSteps).ToArray();
468        // for target values and latent variables
469        var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start));
470        for (int colIdx = 0; colIdx < trees.Length; colIdx++) {
471          // is target variable
472          if (colIdx < targetVars.Length) {
473            var targetVar = targetVars[colIdx];
474            var trainingDataTable = new DataTable(targetVar + " prediction (training)");
475            var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
476            var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPrediction.Select(arr => arr[colIdx].Item1).ToArray());
477            trainingDataTable.Rows.Add(actualValuesRow);
478            trainingDataTable.Rows.Add(predictedValuesRow);
479            trainingList.Add(trainingDataTable);
480          } else {
481            var latentVar = latentVariables[colIdx - targetVars.Length];
482            var trainingDataTable = new DataTable(latentVar + " prediction (training)");
483            var predictedValuesRow = new DataRow(latentVar + " pred.", "Predicted values for " + latentVar, trainingPrediction.Select(arr => arr[colIdx].Item1).ToArray());
484            var emptyRow = new DataRow(latentVar);
485            trainingDataTable.Rows.Add(emptyRow);
486            trainingDataTable.Rows.Add(predictedValuesRow);
487            trainingList.Add(trainingDataTable);
488          }
489        }
490        // TODO: DRY for training and test
491        var testList = new ItemList<DataTable>();
492        var testRows = ProblemData.TestIndices.ToArray();
493        var testPrediction = Integrate(
494         trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
495         problemData.Dataset,
496         problemData.AllowedInputVariables.ToArray(),
497         targetVars,
498         latentVariables,
499         new IntRange[] { ProblemData.TestPartition },
500         optTheta,
501         OdeSolver,
502         NumericIntegrationSteps).ToArray();
503
504        for (int colIdx = 0; colIdx < trees.Length; colIdx++) {
505          // is target variable
506          if (colIdx < targetVars.Length) {
507            var targetVar = targetVars[colIdx];
508            var testDataTable = new DataTable(targetVar + " prediction (test)");
509            var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, testRows));
510            var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
511            testDataTable.Rows.Add(actualValuesRow);
512            testDataTable.Rows.Add(predictedValuesRow);
513            testList.Add(testDataTable);
514
515          } else {
516            var latentVar = latentVariables[colIdx - targetVars.Length];
517            var testDataTable = new DataTable(latentVar + " prediction (test)");
518            var predictedValuesRow = new DataRow(latentVar + " pred.", "Predicted values for " + latentVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
519            var emptyRow = new DataRow(latentVar);
520            testDataTable.Rows.Add(emptyRow);
521            testDataTable.Rows.Add(predictedValuesRow);
522            testList.Add(testDataTable);
523          }
524        }
525
526        results["Prediction (training)"].Value = trainingList.AsReadOnly();
527        results["Prediction (test)"].Value = testList.AsReadOnly();
528
529
530        #region simplification of models
531        // TODO the dependency of HeuristicLab.Problems.DataAnalysis.Symbolic is not ideal
532        var models = new VariableCollection();    // to store target var names and original version of tree
533
534        var optimizedTrees = new List<ISymbolicExpressionTree>();
535        int nextParIdx = 0;
536        for (int idx = 0; idx < trees.Length; idx++) {
537          var tree = trees[idx];
538          optimizedTrees.Add(new SymbolicExpressionTree(FixParameters(tree.Root, optTheta.ToArray(), ref nextParIdx)));
539        }
540        var ds = problemData.Dataset;
541        var newVarNames = Enumerable.Range(0, nextParIdx).Select(i => "c_" + i).ToArray();
542        var allVarNames = ds.DoubleVariables.Concat(newVarNames);
543        var newVarValues = Enumerable.Range(0, nextParIdx).Select(i => "c_" + i).ToArray();
544        var allVarValues = ds.DoubleVariables.Select(varName => ds.GetDoubleValues(varName).ToList())
545          .Concat(Enumerable.Range(0, nextParIdx).Select(i => Enumerable.Repeat(optTheta[i], ds.Rows).ToList()))
546          .ToList();
547        var newDs = new Dataset(allVarNames, allVarValues);
548        var newProblemData = new RegressionProblemData(newDs, problemData.AllowedInputVariables.Concat(newVarValues).ToArray(), problemData.TargetVariable);
549        results["Solution"].Value = new Solution(optimizedTrees.ToArray(),
550                   // optTheta,
551                   newProblemData,
552                   targetVars,
553                   latentVariables,
554                   TrainingEpisodes,
555                   OdeSolver,
556                   NumericIntegrationSteps);
557
558
559        nextParIdx = 0;
560        for (int idx = 0; idx < trees.Length; idx++) {
561          var varName = string.Empty;
562          if (idx < targetVars.Length) {
563            varName = targetVars[idx];
564          } else {
565            varName = latentVariables[idx - targetVars.Length];
566          }
567          var tree = trees[idx];
568
569          // when we reference HeuristicLab.Problems.DataAnalysis.Symbolic we can translate symbols
570          var shownTree = new SymbolicExpressionTree(TranslateTreeNode(tree.Root, optTheta.ToArray(),
571            ref nextParIdx));
572
573          // var shownTree = (SymbolicExpressionTree)tree.Clone();
574          // var constantsNodeOrig = tree.IterateNodesPrefix().Where(IsConstantNode);
575          // var constantsNodeShown = shownTree.IterateNodesPrefix().Where(IsConstantNode);
576          //
577          // foreach (var n in constantsNodeOrig.Zip(constantsNodeShown, (original, shown) => new { original, shown })) {
578          //   double constantsVal = optTheta[nodeIdx[n.original]];
579          //
580          //   ConstantTreeNode replacementNode = new ConstantTreeNode(new Constant()) { Value = constantsVal };
581          //
582          //   var parentNode = n.shown.Parent;
583          //   int replacementIndex = parentNode.IndexOfSubtree(n.shown);
584          //   parentNode.RemoveSubtree(replacementIndex);
585          //   parentNode.InsertSubtree(replacementIndex, replacementNode);
586          // }
587
588          var origTreeVar = new HeuristicLab.Core.Variable(varName + "(original)");
589          origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
590          models.Add(origTreeVar);
591          var simplifiedTreeVar = new HeuristicLab.Core.Variable(varName + "(simplified)");
592          simplifiedTreeVar.Value = TreeSimplifier.Simplify(shownTree);
593          models.Add(simplifiedTreeVar);
594
595        }
596
597        results["Models"].Value = models;
598        #endregion
599      }
600    }
601
602
603    #region interpretation
604
605    // the following uses auto-diff to calculate the gradient w.r.t. the parameters forward in time.
606    // this is basically the method described in Gronwall T. Note on the derivatives with respect to a parameter of the solutions of a system of differential equations. Ann. Math. 1919;20:292–296.
607
608    // a comparison of three potential calculation methods for the gradient is given in:
609    // Sengupta, B., Friston, K. J., & Penny, W. D. (2014). Efficient gradient computation for dynamical models. Neuroimage, 98(100), 521–527. http://doi.org/10.1016/j.neuroimage.2014.04.040
610    // "Our comparison establishes that the adjoint method is computationally more efficient for numerical estimation of parametric gradients
611    // for state-space models — both linear and non-linear, as in the case of a dynamical causal model (DCM)"
612
613    // for a solver with the necessary features see: https://computation.llnl.gov/projects/sundials/cvodes
614
615    public static IEnumerable<Tuple<double, Vector>[]> Integrate(
616      ISymbolicExpressionTree[] trees, IDataset dataset,
617      string[] inputVariables, string[] targetVariables, string[] latentVariables, IEnumerable<IntRange> episodes,
618      double[] parameterValues,
619      string odeSolver, int numericIntegrationSteps = 100) {
620
621      // TODO: numericIntegrationSteps is only relevant for the HeuristicLab solver
622      var episodeIdx = 0;
623      foreach (var episode in episodes) {
624        var rows = Enumerable.Range(episode.Start, episode.End - episode.Start);
625
626        // integrate forward starting with known values for the target in t0
627
628        var variableValues = new Dictionary<string, Tuple<double, Vector>>();
629        var t0 = rows.First();
630        foreach (var varName in inputVariables) {
631          variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero));
632        }
633        foreach (var varName in targetVariables) {
634          variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero));
635        }
636        // add value entries for latent variables which are also integrated
637        // initial values are at the end of the parameter vector
638        // separete initial values for each episode
639        var initialValueIdx = parameterValues.Length - episodes.Count() * latentVariables.Length + episodeIdx * latentVariables.Length;
640        foreach (var latentVar in latentVariables) {
641          var arr = new double[parameterValues.Length]; // backing array
642          arr[initialValueIdx] = 1.0;
643          var g = new Vector(arr);
644          variableValues.Add(latentVar,
645            Tuple.Create(parameterValues[initialValueIdx], g)); // we don't have observations for latent variables therefore we optimize the initial value for each episode
646          initialValueIdx++;
647        }
648
649        var calculatedVariables = targetVariables.Concat(latentVariables).ToArray(); // TODO: must conincide with the order of trees in the encoding
650
651        // return first value as stored in the dataset
652        yield return calculatedVariables
653          .Select(calcVarName => variableValues[calcVarName])
654          .ToArray();
655
656        var prevT = rows.First(); // TODO: here we should use a variable for t if it is available. Right now we assume equidistant measurements.
657        foreach (var t in rows.Skip(1)) {
658          if (odeSolver == "HeuristicLab")
659            IntegrateHL(trees, calculatedVariables, variableValues, parameterValues, numericIntegrationSteps);
660          else if (odeSolver == "CVODES")
661            IntegrateCVODES(trees, calculatedVariables, variableValues, parameterValues, t - prevT);
662          else throw new InvalidOperationException("Unknown ODE solver " + odeSolver);
663          prevT = t;
664
665          // This check doesn't work with the HeuristicLab integrator if there are input variables
666          //if (variableValues.Count == targetVariables.Length) {
667          // only return the target variables for calculation of errors
668          var res = calculatedVariables
669            .Select(targetVar => variableValues[targetVar])
670            .ToArray();
671          if (res.Any(ri => double.IsNaN(ri.Item1) || double.IsInfinity(ri.Item1))) yield break;
672          yield return res;
673          //} else {
674          //  yield break; // stop early on problems
675          //}
676
677
678          // update for next time step
679          foreach (var varName in inputVariables) {
680            variableValues[varName] = Tuple.Create(dataset.GetDoubleValue(varName, t), Vector.Zero);
681          }
682        }
683        episodeIdx++;
684      }
685    }
686
687    #region CVODES
688
689
690    /// <summary>
691    ///  Here we use CVODES to solve the ODE. Forward sensitivities are used to calculate the gradient for parameter optimization
692    /// </summary>
693    /// <param name="trees">Each equation in the ODE represented as a tree</param>
694    /// <param name="calculatedVariables">The names of the calculated variables</param>
695    /// <param name="variableValues">The start values of the calculated variables as well as their sensitivites over parameters</param>
696    /// <param name="parameterValues">The current parameter values</param>
697    /// <param name="t">The time t up to which we need to integrate.</param>
698    private static void IntegrateCVODES(
699      ISymbolicExpressionTree[] trees, // f(y,p) in tree representation
700      string[] calculatedVariables, // names of elements of y
701      Dictionary<string, Tuple<double, Vector>> variableValues,  //  y (intput and output) input: y(t0), output: y(t0+t)
702      double[] parameterValues, // p
703      double t // duration t for which we want to integrate
704      ) {
705
706      // the RHS of the ODE
707      // dy/dt = f(y_t,x_t,p)
708      CVODES.CVRhsFunc f = CreateOdeRhs(trees, calculatedVariables, parameterValues);
709      // the Jacobian ∂f/∂y
710      CVODES.CVDlsJacFunc jac = CreateJac(trees, calculatedVariables, parameterValues);
711
712      // the RHS for the forward sensitivities (∂f/∂y)s_i(t) + ∂f/∂p_i
713      CVODES.CVSensRhsFn sensF = CreateSensitivityRhs(trees, calculatedVariables, parameterValues);
714
715      // setup solver
716      int numberOfEquations = trees.Length;
717      IntPtr y = IntPtr.Zero;
718      IntPtr cvode_mem = IntPtr.Zero;
719      IntPtr A = IntPtr.Zero;
720      IntPtr yS0 = IntPtr.Zero;
721      IntPtr linearSolver = IntPtr.Zero;
722      var ns = parameterValues.Length; // number of parameters
723
724      try {
725        y = CVODES.N_VNew_Serial(numberOfEquations);
726        // init y to current values of variables
727        // y must be initialized before calling CVodeInit
728        for (int i = 0; i < calculatedVariables.Length; i++) {
729          CVODES.NV_Set_Ith_S(y, i, variableValues[calculatedVariables[i]].Item1);
730        }
731
732        cvode_mem = CVODES.CVodeCreate(CVODES.MultistepMethod.CV_ADAMS, CVODES.NonlinearSolverIteration.CV_FUNCTIONAL);
733
734        var flag = CVODES.CVodeInit(cvode_mem, f, 0.0, y);
735        Debug.Assert(CVODES.CV_SUCCESS == flag);
736
737        double relTol = 1.0e-2;
738        double absTol = 1.0;
739        flag = CVODES.CVodeSStolerances(cvode_mem, relTol, absTol);  // TODO: probably need to adjust absTol per variable
740        Debug.Assert(CVODES.CV_SUCCESS == flag);
741
742        A = CVODES.SUNDenseMatrix(numberOfEquations, numberOfEquations);
743        Debug.Assert(A != IntPtr.Zero);
744
745        linearSolver = CVODES.SUNDenseLinearSolver(y, A);
746        Debug.Assert(linearSolver != IntPtr.Zero);
747
748        flag = CVODES.CVDlsSetLinearSolver(cvode_mem, linearSolver, A);
749        Debug.Assert(CVODES.CV_SUCCESS == flag);
750
751        flag = CVODES.CVDlsSetJacFn(cvode_mem, jac);
752        Debug.Assert(CVODES.CV_SUCCESS == flag);
753
754        yS0 = CVODES.N_VCloneVectorArray_Serial(ns, y); // clone the output vector for each parameter
755        unsafe {
756          // set to initial sensitivities supplied by caller
757          for (int pIdx = 0; pIdx < ns; pIdx++) {
758            var yS0_i = *((IntPtr*)yS0.ToPointer() + pIdx);
759            for (var varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
760              CVODES.NV_Set_Ith_S(yS0_i, varIdx, variableValues[calculatedVariables[varIdx]].Item2[pIdx]);
761            }
762          }
763        }
764
765        flag = CVODES.CVodeSensInit(cvode_mem, ns, CVODES.CV_SIMULTANEOUS, sensF, yS0);
766        Debug.Assert(CVODES.CV_SUCCESS == flag);
767
768        flag = CVODES.CVodeSensEEtolerances(cvode_mem);
769        Debug.Assert(CVODES.CV_SUCCESS == flag);
770
771        // make one forward integration step
772        double tout = 0.0; // first output time
773        flag = CVODES.CVode(cvode_mem, t, y, ref tout, CVODES.CV_NORMAL);
774        if (flag == CVODES.CV_SUCCESS) {
775          Debug.Assert(t == tout);
776
777          // get sensitivities
778          flag = CVODES.CVodeGetSens(cvode_mem, ref tout, yS0);
779          Debug.Assert(CVODES.CV_SUCCESS == flag);
780
781          // update variableValues based on integration results
782          for (int varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
783            var yi = CVODES.NV_Get_Ith_S(y, varIdx);
784            var gArr = new double[parameterValues.Length];
785            for (var pIdx = 0; pIdx < parameterValues.Length; pIdx++) {
786              unsafe {
787                var yS0_pi = *((IntPtr*)yS0.ToPointer() + pIdx);
788                gArr[pIdx] = CVODES.NV_Get_Ith_S(yS0_pi, varIdx);
789              }
790            }
791            variableValues[calculatedVariables[varIdx]] = Tuple.Create(yi, new Vector(gArr));
792          }
793        } else {
794          variableValues.Clear();   // indicate problems by not returning new values
795        }
796
797        // cleanup all allocated objects
798      } finally {
799        if (y != IntPtr.Zero) CVODES.N_VDestroy_Serial(y);
800        if (cvode_mem != IntPtr.Zero) CVODES.CVodeFree(ref cvode_mem);
801        if (linearSolver != IntPtr.Zero) CVODES.SUNLinSolFree(linearSolver);
802        if (A != IntPtr.Zero) CVODES.SUNMatDestroy(A);
803        if (yS0 != IntPtr.Zero) CVODES.N_VDestroyVectorArray_Serial(yS0, ns);
804      }
805    }
806
807
808    private static CVODES.CVRhsFunc CreateOdeRhs(
809      ISymbolicExpressionTree[] trees,
810      string[] calculatedVariables,
811      double[] parameterValues) {
812      // we don't need to calculate a gradient here
813      return (double t,
814              IntPtr y, // N_Vector, current value of y (input)
815              IntPtr ydot, // N_Vector (calculated value of y' (output)
816              IntPtr user_data // optional user data, (unused here)
817              ) => {
818                // TODO: perf
819                var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
820
821                int pIdx = 0;
822                foreach (var tree in trees) {
823                  foreach (var n in tree.IterateNodesPrefix()) {
824                    if (IsConstantNode(n)) {
825                      nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we do not need a gradient
826                      pIdx++;
827                    } else if (n.SubtreeCount == 0) {
828                      // for variables and latent variables get the value from variableValues
829                      var varName = n.Symbol.Name;
830                      var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
831                      if (varIdx < 0) throw new InvalidProgramException();
832                      var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
833                      nodeValues.Add(n, Tuple.Create(y_i, Vector.Zero)); // no gradient needed
834                    }
835                  }
836                }
837                for (int i = 0; i < trees.Length; i++) {
838                  var tree = trees[i];
839                  var res_i = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
840                  CVODES.NV_Set_Ith_S(ydot, i, res_i.Item1);
841                }
842                return 0;
843              };
844    }
845
846    private static CVODES.CVDlsJacFunc CreateJac(
847      ISymbolicExpressionTree[] trees,
848      string[] calculatedVariables,
849      double[] parameterValues) {
850
851      return (
852        double t, // current time (input)
853        IntPtr y, // N_Vector, current value of y (input)
854        IntPtr fy, // N_Vector, current value of f (input)
855        IntPtr Jac, // SUNMatrix ∂f/∂y (output, rows i contains are ∂f_i/∂y vector)
856        IntPtr user_data, // optional (unused here)
857        IntPtr tmp1, // N_Vector, optional (unused here)
858        IntPtr tmp2, // N_Vector, optional (unused here)
859        IntPtr tmp3 // N_Vector, optional (unused here)
860      ) => {
861        // here we need to calculate partial derivatives for the calculated variables y
862        var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
863        int pIdx = 0;
864        foreach (var tree in trees) {
865          foreach (var n in tree.IterateNodesPrefix()) {
866            if (IsConstantNode(n)) {
867              nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we need a gradient over y which is zero for parameters
868              pIdx++;
869            } else if (n.SubtreeCount == 0) {
870              // for variables and latent variables we use values supplied in y and init gradient vectors accordingly
871              var varName = n.Symbol.Name;
872              var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
873              if (varIdx < 0) throw new InvalidProgramException();
874
875              var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
876              var gArr = new double[CVODES.NV_LENGTH_S(y)]; // backing array
877              gArr[varIdx] = 1.0;
878              var g = new Vector(gArr);
879              nodeValues.Add(n, Tuple.Create(y_i, g));
880            }
881          }
882        }
883
884        for (int i = 0; i < trees.Length; i++) {
885          var tree = trees[i];
886          var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
887          var g = res.Item2;
888          for (int j = 0; j < calculatedVariables.Length; j++) {
889            CVODES.SUNDenseMatrix_Set(Jac, i, j, g[j]);
890          }
891        }
892        return 0; // on success
893      };
894    }
895
896
897    // to calculate sensitivities RHS for all equations at once
898    // must compute (∂f/∂y)s_i(t) + ∂f/∂p_i and store in ySdot.
899    // Index i refers to parameters, dimensionality of matrix and vectors is number of equations
900    private static CVODES.CVSensRhsFn CreateSensitivityRhs(ISymbolicExpressionTree[] trees, string[] calculatedVariables, double[] parameterValues) {
901      return (
902              int Ns, // number of parameters
903              double t, // current time
904              IntPtr y, // N_Vector y(t) (input)
905              IntPtr ydot, // N_Vector dy/dt(t) (input)
906              IntPtr yS, // N_Vector*, one vector for each parameter (input)
907              IntPtr ySdot, // N_Vector*, one vector for each parameter (output)
908              IntPtr user_data, // optional (unused here)
909              IntPtr tmp1, // N_Vector, optional (unused here)
910              IntPtr tmp2 // N_Vector, optional (unused here)
911        ) => {
912          // here we need to calculate partial derivatives for the calculated variables y as well as for the parameters
913          var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
914          var d = calculatedVariables.Length + parameterValues.Length; // dimensionality of gradient
915          // first collect variable values
916          foreach (var tree in trees) {
917            foreach (var n in tree.IterateNodesPrefix()) {
918              if (IsVariableNode(n)) {
919                // for variables and latent variables we use values supplied in y and init gradient vectors accordingly
920                var varName = n.Symbol.Name;
921                var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
922                if (varIdx < 0) throw new InvalidProgramException();
923
924                var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
925                var gArr = new double[d]; // backing array
926                gArr[varIdx] = 1.0;
927                var g = new Vector(gArr);
928                nodeValues.Add(n, Tuple.Create(y_i, g));
929              }
930            }
931          }
932          // then collect constants
933          int pIdx = 0;
934          foreach (var tree in trees) {
935            foreach (var n in tree.IterateNodesPrefix()) {
936              if (IsConstantNode(n)) {
937                var gArr = new double[d];
938                gArr[calculatedVariables.Length + pIdx] = 1.0;
939                var g = new Vector(gArr);
940                nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], g));
941                pIdx++;
942              }
943            }
944          }
945          // gradient vector is [∂f/∂y_1, ∂f/∂y_2, ... ∂f/∂yN, ∂f/∂p_1 ... ∂f/∂p_K]
946
947
948          for (pIdx = 0; pIdx < Ns; pIdx++) {
949            unsafe {
950              var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx);
951              CVODES.N_VConst_Serial(0.0, sDot_pi);
952            }
953          }
954
955          for (int i = 0; i < trees.Length; i++) {
956            var tree = trees[i];
957            var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
958            var g = res.Item2;
959
960
961            // update ySdot = (∂f/∂y)s_i(t) + ∂f/∂p_i
962
963            for (pIdx = 0; pIdx < Ns; pIdx++) {
964              unsafe {
965                var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx);
966                var s_pi = *((IntPtr*)yS.ToPointer() + pIdx);
967
968                var v = CVODES.NV_Get_Ith_S(sDot_pi, i);
969                // (∂f/∂y)s_i(t)
970                var p = 0.0;
971                for (int yIdx = 0; yIdx < calculatedVariables.Length; yIdx++) {
972                  p += g[yIdx] * CVODES.NV_Get_Ith_S(s_pi, yIdx);
973                }
974                // + ∂f/∂p_i
975                CVODES.NV_Set_Ith_S(sDot_pi, i, v + p + g[calculatedVariables.Length + pIdx]);
976              }
977            }
978
979          }
980          return 0; // on success
981        };
982    }
983    #endregion
984
985    private static void IntegrateHL(
986      ISymbolicExpressionTree[] trees,
987      string[] calculatedVariables, // names of integrated variables
988      Dictionary<string, Tuple<double, Vector>> variableValues, //y (intput and output) input: y(t0), output: y(t0+1)
989      double[] parameterValues,
990      int numericIntegrationSteps) {
991
992      var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
993
994      // the nodeValues for parameters are constant over time
995      // TODO: this needs to be done only once for each iteration in gradient descent (whenever parameter values change)
996      // NOTE: the order must be the same as above (prefix order for nodes)
997      int pIdx = 0;
998      foreach (var tree in trees) {
999        foreach (var node in tree.Root.IterateNodesPrefix()) {
1000          if (IsConstantNode(node)) {
1001            var gArr = new double[parameterValues.Length]; // backing array
1002            gArr[pIdx] = 1.0;
1003            var g = new Vector(gArr);
1004            nodeValues.Add(node, new Tuple<double, Vector>(parameterValues[pIdx], g));
1005            pIdx++;
1006          } else if (node.SubtreeCount == 0) {
1007            // for (latent) variables get the values from variableValues
1008            var varName = node.Symbol.Name;
1009            nodeValues.Add(node, variableValues[varName]);
1010          }
1011        }
1012      }
1013
1014
1015      double h = 1.0 / numericIntegrationSteps;
1016      for (int step = 0; step < numericIntegrationSteps; step++) {
1017        var deltaValues = new Dictionary<string, Tuple<double, Vector>>();
1018        for (int i = 0; i < trees.Length; i++) {
1019          var tree = trees[i];
1020          var targetVarName = calculatedVariables[i];
1021
1022          // Root.GetSubtree(0).GetSubtree(0) skips programRoot and startSymbol
1023          var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
1024          deltaValues.Add(targetVarName, res);
1025        }
1026
1027        // update variableValues for next step, trapezoid integration
1028        foreach (var kvp in deltaValues) {
1029          var oldVal = variableValues[kvp.Key];
1030          var newVal = Tuple.Create(
1031            oldVal.Item1 + h * kvp.Value.Item1,
1032            oldVal.Item2 + h * kvp.Value.Item2
1033          );
1034          variableValues[kvp.Key] = newVal;
1035        }
1036
1037
1038        foreach (var node in nodeValues.Keys.ToArray()) {
1039          if (node.SubtreeCount == 0 && !IsConstantNode(node)) {
1040            // update values for (latent) variables
1041            var varName = node.Symbol.Name;
1042            nodeValues[node] = variableValues[varName];
1043          }
1044        }
1045      }
1046    }
1047
1048    private static Tuple<double, Vector> InterpretRec(
1049      ISymbolicExpressionTreeNode node,
1050      Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>> nodeValues      // contains value and gradient vector for a node (variables and constants only)
1051        ) {
1052
1053      switch (node.Symbol.Name) {
1054        case "+": {
1055            var l = InterpretRec(node.GetSubtree(0), nodeValues);
1056            var r = InterpretRec(node.GetSubtree(1), nodeValues);
1057
1058            return Tuple.Create(l.Item1 + r.Item1, l.Item2 + r.Item2);
1059          }
1060        case "*": {
1061            var l = InterpretRec(node.GetSubtree(0), nodeValues);
1062            var r = InterpretRec(node.GetSubtree(1), nodeValues);
1063
1064            return Tuple.Create(l.Item1 * r.Item1, l.Item2 * r.Item1 + l.Item1 * r.Item2);
1065          }
1066
1067        case "-": {
1068            var l = InterpretRec(node.GetSubtree(0), nodeValues);
1069            var r = InterpretRec(node.GetSubtree(1), nodeValues);
1070
1071            return Tuple.Create(l.Item1 - r.Item1, l.Item2 - r.Item2);
1072          }
1073        case "%": {
1074            var l = InterpretRec(node.GetSubtree(0), nodeValues);
1075            var r = InterpretRec(node.GetSubtree(1), nodeValues);
1076
1077            // protected division
1078            if (r.Item1.IsAlmost(0.0)) {
1079              return Tuple.Create(0.0, Vector.Zero);
1080            } else {
1081              return Tuple.Create(
1082                l.Item1 / r.Item1,
1083                l.Item1 * -1.0 / (r.Item1 * r.Item1) * r.Item2 + 1.0 / r.Item1 * l.Item2 // (f/g)' = f * (1/g)' + 1/g * f' = f * -1/g² * g' + 1/g * f'
1084                );
1085            }
1086          }
1087        case "sin": {
1088            var x = InterpretRec(node.GetSubtree(0), nodeValues);
1089            return Tuple.Create(
1090              Math.Sin(x.Item1),
1091              Vector.Cos(x.Item2) * x.Item2
1092            );
1093          }
1094        case "cos": {
1095            var x = InterpretRec(node.GetSubtree(0), nodeValues);
1096            return Tuple.Create(
1097              Math.Cos(x.Item1),
1098              -Vector.Sin(x.Item2) * x.Item2
1099            );
1100          }
1101        case "sqr": {
1102            var x = InterpretRec(node.GetSubtree(0), nodeValues);
1103            return Tuple.Create(
1104              x.Item1 * x.Item1,
1105              2.0 * x.Item1 * x.Item2
1106            );
1107          }
1108        default: {
1109            return nodeValues[node];  // value and gradient for constants and variables must be set by the caller
1110          }
1111      }
1112    }
1113    #endregion
1114
1115    #region events
1116    /*
1117     * Dependencies between parameters:
1118     *
1119     * ProblemData
1120     *    |
1121     *    V
1122     * TargetVariables   FunctionSet    MaximumLength    NumberOfLatentVariables
1123     *               |   |                 |                   |
1124     *               V   V                 |                   |
1125     *             Grammar <---------------+-------------------
1126     *                |
1127     *                V
1128     *            Encoding
1129     */
1130    private void RegisterEventHandlers() {
1131      ProblemDataParameter.ValueChanged += ProblemDataParameter_ValueChanged;
1132      if (ProblemDataParameter.Value != null) ProblemDataParameter.Value.Changed += ProblemData_Changed;
1133
1134      TargetVariablesParameter.ValueChanged += TargetVariablesParameter_ValueChanged;
1135      if (TargetVariablesParameter.Value != null) TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
1136
1137      FunctionSetParameter.ValueChanged += FunctionSetParameter_ValueChanged;
1138      if (FunctionSetParameter.Value != null) FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
1139
1140      MaximumLengthParameter.Value.ValueChanged += MaximumLengthChanged;
1141
1142      NumberOfLatentVariablesParameter.Value.ValueChanged += NumLatentVariablesChanged;
1143    }
1144
1145    private void NumLatentVariablesChanged(object sender, EventArgs e) {
1146      UpdateGrammarAndEncoding();
1147    }
1148
1149    private void MaximumLengthChanged(object sender, EventArgs e) {
1150      UpdateGrammarAndEncoding();
1151    }
1152
1153    private void FunctionSetParameter_ValueChanged(object sender, EventArgs e) {
1154      FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
1155    }
1156
1157    private void CheckedFunctionsChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
1158      UpdateGrammarAndEncoding();
1159    }
1160
1161    private void TargetVariablesParameter_ValueChanged(object sender, EventArgs e) {
1162      TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
1163    }
1164
1165    private void CheckedTargetVariablesChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
1166      UpdateGrammarAndEncoding();
1167    }
1168
1169    private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) {
1170      ProblemDataParameter.Value.Changed += ProblemData_Changed;
1171      OnProblemDataChanged();
1172      OnReset();
1173    }
1174
1175    private void ProblemData_Changed(object sender, EventArgs e) {
1176      OnProblemDataChanged();
1177      OnReset();
1178    }
1179
1180    private void OnProblemDataChanged() {
1181      UpdateTargetVariables();        // implicitly updates other dependent parameters
1182      var handler = ProblemDataChanged;
1183      if (handler != null) handler(this, EventArgs.Empty);
1184    }
1185
1186    #endregion
1187
1188    #region  helper
1189
1190    private void InitAllParameters() {
1191      UpdateTargetVariables(); // implicitly updates the grammar and the encoding     
1192    }
1193
1194    private ReadOnlyCheckedItemList<StringValue> CreateFunctionSet() {
1195      var l = new CheckedItemList<StringValue>();
1196      l.Add(new StringValue("+").AsReadOnly());
1197      l.Add(new StringValue("*").AsReadOnly());
1198      l.Add(new StringValue("%").AsReadOnly());
1199      l.Add(new StringValue("-").AsReadOnly());
1200      l.Add(new StringValue("sin").AsReadOnly());
1201      l.Add(new StringValue("cos").AsReadOnly());
1202      l.Add(new StringValue("sqr").AsReadOnly());
1203      return l.AsReadOnly();
1204    }
1205
1206    private static bool IsConstantNode(ISymbolicExpressionTreeNode n) {
1207      return n.Symbol.Name[0] == 'θ';
1208    }
1209    private static bool IsLatentVariableNode(ISymbolicExpressionTreeNode n) {
1210      return n.Symbol.Name[0] == 'λ';
1211    }
1212    private static bool IsVariableNode(ISymbolicExpressionTreeNode n) {
1213      return (n.SubtreeCount == 0) && !IsConstantNode(n) && !IsLatentVariableNode(n);
1214    }
1215
1216
1217    private void UpdateTargetVariables() {
1218      var currentlySelectedVariables = TargetVariables.CheckedItems
1219        .OrderBy(i => i.Index)
1220        .Select(i => i.Value.Value)
1221        .ToArray();
1222
1223      var newVariablesList = new CheckedItemList<StringValue>(ProblemData.Dataset.VariableNames.Select(str => new StringValue(str).AsReadOnly()).ToArray()).AsReadOnly();
1224      var matchingItems = newVariablesList.Where(item => currentlySelectedVariables.Contains(item.Value)).ToArray();
1225      foreach (var matchingItem in matchingItems) {
1226        newVariablesList.SetItemCheckedState(matchingItem, true);
1227      }
1228      TargetVariablesParameter.Value = newVariablesList;
1229    }
1230
1231    private void UpdateGrammarAndEncoding() {
1232      var encoding = new MultiEncoding();
1233      var g = CreateGrammar();
1234      foreach (var targetVar in TargetVariables.CheckedItems) {
1235        encoding = encoding.Add(new SymbolicExpressionTreeEncoding(targetVar + "_tree", g, MaximumLength, MaximumLength)); // only limit by length
1236      }
1237      for (int i = 1; i <= NumberOfLatentVariables; i++) {
1238        encoding = encoding.Add(new SymbolicExpressionTreeEncoding("λ" + i + "_tree", g, MaximumLength, MaximumLength));
1239      }
1240      Encoding = encoding;
1241    }
1242
1243    private ISymbolicExpressionGrammar CreateGrammar() {
1244      // whenever ProblemData is changed we create a new grammar with the necessary symbols
1245      var g = new SimpleSymbolicExpressionGrammar();
1246      g.AddSymbols(FunctionSet.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray(), 2, 2);
1247
1248      // TODO
1249      //g.AddSymbols(new[] {
1250      //  "exp",
1251      //  "log", // log( <expr> ) // TODO: init a theta to ensure the value is always positive
1252      //  "exp_minus" // exp((-1) * <expr>
1253      //}, 1, 1);
1254
1255      foreach (var variableName in ProblemData.AllowedInputVariables.Union(TargetVariables.CheckedItems.Select(i => i.Value.Value)))
1256        g.AddTerminalSymbol(variableName);
1257
1258      // generate symbols for numeric parameters for which the value is optimized using AutoDiff
1259      // we generate multiple symbols to balance the probability for selecting a numeric parameter in the generation of random trees
1260      var numericConstantsFactor = 2.0;
1261      for (int i = 0; i < numericConstantsFactor * (ProblemData.AllowedInputVariables.Count() + TargetVariables.CheckedItems.Count()); i++) {
1262        g.AddTerminalSymbol("θ" + i); // numeric parameter for which the value is optimized using AutoDiff
1263      }
1264
1265      // generate symbols for latent variables
1266      for (int i = 1; i <= NumberOfLatentVariables; i++) {
1267        g.AddTerminalSymbol("λ" + i); // numeric parameter for which the value is optimized using AutoDiff
1268      }
1269
1270      return g;
1271    }
1272
1273
1274
1275
1276
1277    private ISymbolicExpressionTreeNode FixParameters(ISymbolicExpressionTreeNode n, double[] parameterValues, ref int nextParIdx) {
1278      ISymbolicExpressionTreeNode translatedNode = null;
1279      if (n.Symbol is StartSymbol) {
1280        translatedNode = new StartSymbol().CreateTreeNode();
1281      } else if (n.Symbol is ProgramRootSymbol) {
1282        translatedNode = new ProgramRootSymbol().CreateTreeNode();
1283      } else if (n.Symbol.Name == "+") {
1284        translatedNode = new SimpleSymbol("+", 2).CreateTreeNode();
1285      } else if (n.Symbol.Name == "-") {
1286        translatedNode = new SimpleSymbol("-", 2).CreateTreeNode();
1287      } else if (n.Symbol.Name == "*") {
1288        translatedNode = new SimpleSymbol("*", 2).CreateTreeNode();
1289      } else if (n.Symbol.Name == "%") {
1290        translatedNode = new SimpleSymbol("%", 2).CreateTreeNode();
1291      } else if (n.Symbol.Name == "sin") {
1292        translatedNode = new SimpleSymbol("sin", 1).CreateTreeNode();
1293      } else if (n.Symbol.Name == "cos") {
1294        translatedNode = new SimpleSymbol("cos", 1).CreateTreeNode();
1295      } else if (n.Symbol.Name == "sqr") {
1296        translatedNode = new SimpleSymbol("sqr", 1).CreateTreeNode();
1297      } else if (IsConstantNode(n)) {
1298        translatedNode = new SimpleSymbol("c_" + nextParIdx, 0).CreateTreeNode();
1299        nextParIdx++;
1300      } else {
1301        translatedNode = new SimpleSymbol(n.Symbol.Name, n.SubtreeCount).CreateTreeNode();
1302      }
1303      foreach (var child in n.Subtrees) {
1304        translatedNode.AddSubtree(FixParameters(child, parameterValues, ref nextParIdx));
1305      }
1306      return translatedNode;
1307    }
1308
1309
1310    private ISymbolicExpressionTreeNode TranslateTreeNode(ISymbolicExpressionTreeNode n, double[] parameterValues, ref int nextParIdx) {
1311      ISymbolicExpressionTreeNode translatedNode = null;
1312      if (n.Symbol is StartSymbol) {
1313        translatedNode = new StartSymbol().CreateTreeNode();
1314      } else if (n.Symbol is ProgramRootSymbol) {
1315        translatedNode = new ProgramRootSymbol().CreateTreeNode();
1316      } else if (n.Symbol.Name == "+") {
1317        translatedNode = new Addition().CreateTreeNode();
1318      } else if (n.Symbol.Name == "-") {
1319        translatedNode = new Subtraction().CreateTreeNode();
1320      } else if (n.Symbol.Name == "*") {
1321        translatedNode = new Multiplication().CreateTreeNode();
1322      } else if (n.Symbol.Name == "%") {
1323        translatedNode = new Division().CreateTreeNode();
1324      } else if (n.Symbol.Name == "sin") {
1325        translatedNode = new Sine().CreateTreeNode();
1326      } else if (n.Symbol.Name == "cos") {
1327        translatedNode = new Cosine().CreateTreeNode();
1328      } else if (n.Symbol.Name == "sqr") {
1329        translatedNode = new Square().CreateTreeNode();
1330      } else if (IsConstantNode(n)) {
1331        var constNode = (ConstantTreeNode)new Constant().CreateTreeNode();
1332        constNode.Value = parameterValues[nextParIdx];
1333        nextParIdx++;
1334        translatedNode = constNode;
1335      } else {
1336        // assume a variable name
1337        var varName = n.Symbol.Name;
1338        var varNode = (VariableTreeNode)new Variable().CreateTreeNode();
1339        varNode.Weight = 1.0;
1340        varNode.VariableName = varName;
1341        translatedNode = varNode;
1342      }
1343      foreach (var child in n.Subtrees) {
1344        translatedNode.AddSubtree(TranslateTreeNode(child, parameterValues, ref nextParIdx));
1345      }
1346      return translatedNode;
1347    }
1348    #endregion
1349
1350    #region Import & Export
1351    public void Load(IRegressionProblemData data) {
1352      Name = data.Name;
1353      Description = data.Description;
1354      ProblemData = data;
1355    }
1356
1357    public IRegressionProblemData Export() {
1358      return ProblemData;
1359    }
1360    #endregion
1361
1362  }
1363}
Note: See TracBrowser for help on using the repository browser.