Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Problem.cs @ 16275

Last change on this file since 16275 was 16275, checked in by gkronber, 5 years ago

#2925: changed random initialization of parameters and fixed two non-critical bugs in the implementation

File size: 58.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Linq;
26using HeuristicLab.Analysis;
27using HeuristicLab.Collections;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Data;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
32using HeuristicLab.Optimization;
33using HeuristicLab.Parameters;
34using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
35using HeuristicLab.Problems.DataAnalysis;
36using HeuristicLab.Problems.DataAnalysis.Symbolic;
37using HeuristicLab.Problems.Instances;
38using Variable = HeuristicLab.Problems.DataAnalysis.Symbolic.Variable;
39
40namespace HeuristicLab.Problems.DynamicalSystemsModelling {
41
42
43
44
45  // Eine weitere Möglichkeit ist spline-smoothing der Daten (über Zeit) um damit für jede Zielvariable
46  // einen bereinigten (Rauschen) Wert und die Ableitung dy/dt für alle Beobachtungen zu bekommen
47  // danach kann man die Regression direkt für dy/dt anwenden (mit bereinigten y als inputs)
48  [Item("Dynamical Systems Modelling Problem", "TODO")]
49  [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 900)]
50  [StorableClass]
51  public sealed class Problem : SingleObjectiveBasicProblem<MultiEncoding>, IRegressionProblem, IProblemInstanceConsumer<IRegressionProblemData>, IProblemInstanceExporter<IRegressionProblemData> {
52    #region parameter names
53    private const string ProblemDataParameterName = "Data";
54    private const string TargetVariablesParameterName = "Target variables";
55    private const string FunctionSetParameterName = "Function set";
56    private const string MaximumLengthParameterName = "Size limit";
57    private const string MaximumParameterOptimizationIterationsParameterName = "Max. parameter optimization iterations";
58    private const string NumberOfLatentVariablesParameterName = "Number of latent variables";
59    private const string NumericIntegrationStepsParameterName = "Steps for numeric integration";
60    private const string TrainingEpisodesParameterName = "Training episodes";
61    private const string OptimizeParametersForEpisodesParameterName = "Optimize parameters for episodes";
62    private const string OdeSolverParameterName = "ODE Solver";
63    #endregion
64
65    #region Parameter Properties
66    IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } }
67
68    public IValueParameter<IRegressionProblemData> ProblemDataParameter {
69      get { return (IValueParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
70    }
71    public IValueParameter<ReadOnlyCheckedItemList<StringValue>> TargetVariablesParameter {
72      get { return (IValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[TargetVariablesParameterName]; }
73    }
74    public IValueParameter<ReadOnlyCheckedItemList<StringValue>> FunctionSetParameter {
75      get { return (IValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[FunctionSetParameterName]; }
76    }
77    public IFixedValueParameter<IntValue> MaximumLengthParameter {
78      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumLengthParameterName]; }
79    }
80    public IFixedValueParameter<IntValue> MaximumParameterOptimizationIterationsParameter {
81      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumParameterOptimizationIterationsParameterName]; }
82    }
83    public IFixedValueParameter<IntValue> NumberOfLatentVariablesParameter {
84      get { return (IFixedValueParameter<IntValue>)Parameters[NumberOfLatentVariablesParameterName]; }
85    }
86    public IFixedValueParameter<IntValue> NumericIntegrationStepsParameter {
87      get { return (IFixedValueParameter<IntValue>)Parameters[NumericIntegrationStepsParameterName]; }
88    }
89    public IValueParameter<ItemList<IntRange>> TrainingEpisodesParameter {
90      get { return (IValueParameter<ItemList<IntRange>>)Parameters[TrainingEpisodesParameterName]; }
91    }
92    public IFixedValueParameter<BoolValue> OptimizeParametersForEpisodesParameter {
93      get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizeParametersForEpisodesParameterName]; }
94    }
95    public IConstrainedValueParameter<StringValue> OdeSolverParameter {
96      get { return (IConstrainedValueParameter<StringValue>)Parameters[OdeSolverParameterName]; }
97    }
98    #endregion
99
100    #region Properties
101    public IRegressionProblemData ProblemData {
102      get { return ProblemDataParameter.Value; }
103      set { ProblemDataParameter.Value = value; }
104    }
105    IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } }
106
107    public ReadOnlyCheckedItemList<StringValue> TargetVariables {
108      get { return TargetVariablesParameter.Value; }
109    }
110
111    public ReadOnlyCheckedItemList<StringValue> FunctionSet {
112      get { return FunctionSetParameter.Value; }
113    }
114
115    public int MaximumLength {
116      get { return MaximumLengthParameter.Value.Value; }
117    }
118    public int MaximumParameterOptimizationIterations {
119      get { return MaximumParameterOptimizationIterationsParameter.Value.Value; }
120    }
121    public int NumberOfLatentVariables {
122      get { return NumberOfLatentVariablesParameter.Value.Value; }
123    }
124    public int NumericIntegrationSteps {
125      get { return NumericIntegrationStepsParameter.Value.Value; }
126    }
127    public IEnumerable<IntRange> TrainingEpisodes {
128      get { return TrainingEpisodesParameter.Value; }
129    }
130    public bool OptimizeParametersForEpisodes {
131      get { return OptimizeParametersForEpisodesParameter.Value.Value; }
132    }
133
134    public string OdeSolver {
135      get { return OdeSolverParameter.Value.Value; }
136      set {
137        var matchingValue = OdeSolverParameter.ValidValues.FirstOrDefault(v => v.Value == value);
138        if (matchingValue == null) throw new ArgumentOutOfRangeException();
139        else OdeSolverParameter.Value = matchingValue;
140      }
141    }
142
143    #endregion
144
145    public event EventHandler ProblemDataChanged;
146
147    public override bool Maximization {
148      get { return false; } // we minimize NMSE
149    }
150
151    #region item cloning and persistence
152    // persistence
153    [StorableConstructor]
154    private Problem(bool deserializing) : base(deserializing) { }
155    [StorableHook(HookType.AfterDeserialization)]
156    private void AfterDeserialization() {
157      if (!Parameters.ContainsKey(OptimizeParametersForEpisodesParameterName)) {
158        Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
159      }
160      RegisterEventHandlers();
161    }
162
163    // cloning
164    private Problem(Problem original, Cloner cloner)
165      : base(original, cloner) {
166      RegisterEventHandlers();
167    }
168    public override IDeepCloneable Clone(Cloner cloner) { return new Problem(this, cloner); }
169    #endregion
170
171    public Problem()
172      : base() {
173      var targetVariables = new CheckedItemList<StringValue>().AsReadOnly(); // HACK: it would be better to provide a new class derived from IDataAnalysisProblem
174      var functions = CreateFunctionSet();
175      Parameters.Add(new ValueParameter<IRegressionProblemData>(ProblemDataParameterName, "The data captured from the dynamical system. Use CSV import functionality to import data.", new RegressionProblemData()));
176      Parameters.Add(new ValueParameter<ReadOnlyCheckedItemList<StringValue>>(TargetVariablesParameterName, "Target variables (overrides setting in ProblemData)", targetVariables));
177      Parameters.Add(new ValueParameter<ReadOnlyCheckedItemList<StringValue>>(FunctionSetParameterName, "The list of allowed functions", functions));
178      Parameters.Add(new FixedValueParameter<IntValue>(MaximumLengthParameterName, "The maximally allowed length of each expression. Set to a small value (5 - 25). Default = 10", new IntValue(10)));
179      Parameters.Add(new FixedValueParameter<IntValue>(MaximumParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of parameters (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(100)));
180      Parameters.Add(new FixedValueParameter<IntValue>(NumberOfLatentVariablesParameterName, "Latent variables (unobserved variables) allow us to produce expressions which are integrated up and can be used in other expressions. They are handled similarly to target variables in forward simulation / integration. The difference to target variables is that there are no data to which the calculated values of latent variables are compared. Set to a small value (0 .. 5) as necessary (default = 0)", new IntValue(0)));
181      Parameters.Add(new FixedValueParameter<IntValue>(NumericIntegrationStepsParameterName, "Number of steps in the numeric integration that are taken from one row to the next (set to 1 to 100). More steps makes the algorithm slower, less steps worsens the accuracy of the numeric integration scheme.", new IntValue(10)));
182      Parameters.Add(new ValueParameter<ItemList<IntRange>>(TrainingEpisodesParameterName, "A list of ranges that should be used for training, each range represents an independent episode. This overrides the TrainingSet parameter in ProblemData.", new ItemList<IntRange>()));
183      Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
184
185      var solversStr = new string[] { "HeuristicLab", "CVODES" };
186      var solvers = new ItemSet<StringValue>(
187        solversStr.Select(s => new StringValue(s).AsReadOnly())
188        );
189      Parameters.Add(new ConstrainedValueParameter<StringValue>(OdeSolverParameterName, "The solver to use for solving the initial value ODE problems", solvers, solvers.First()));
190
191      RegisterEventHandlers();
192      InitAllParameters();
193
194      // TODO: do not clear selection of target variables when the input variables are changed (keep selected target variables)
195      // TODO: UI hangs when selecting / deselecting input variables because the encoding is updated on each item
196      // TODO: use training range as default training episode
197      // TODO: write back optimized parameters to solution?
198
199    }
200
201    public override double Evaluate(Individual individual, IRandom random) {
202      var trees = individual.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
203      // write back optimized parameters to tree nodes instead of the separate OptTheta variable
204      // retreive optimized parameters from nodes?
205
206      if (OptimizeParametersForEpisodes) {
207        int eIdx = 0;
208        double totalNMSE = 0.0;
209        int totalSize = 0;
210        foreach (var episode in TrainingEpisodes) {
211          double[] optTheta;
212          double nmse;
213          OptimizeForEpisodes(trees, random, new[] { episode }, out optTheta, out nmse);
214          individual["OptTheta_" + eIdx] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
215          eIdx++;
216          totalNMSE += nmse * episode.Size;
217          totalSize += episode.Size;
218        }
219        return totalNMSE / totalSize;
220      } else {
221        double[] optTheta;
222        double nmse;
223        OptimizeForEpisodes(trees, random, TrainingEpisodes, out optTheta, out nmse);
224        individual["OptTheta"] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
225        return nmse;
226      }
227    }
228
229    private void OptimizeForEpisodes(
230      ISymbolicExpressionTree[] trees,
231      IRandom random,
232      IEnumerable<IntRange> episodes,
233      out double[] optTheta,
234      out double nmse) {
235      var rows = episodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start)).ToArray();
236      var problemData = ProblemData;
237      var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray();
238      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
239      var targetValues = new double[rows.Length, targetVars.Length];
240
241      // collect values of all target variables
242      var colIdx = 0;
243      foreach (var targetVar in targetVars) {
244        int rowIdx = 0;
245        foreach (var value in problemData.Dataset.GetDoubleValues(targetVar, rows)) {
246          targetValues[rowIdx, colIdx] = value;
247          rowIdx++;
248        }
249        colIdx++;
250      }
251
252      // NOTE: the order of values in parameter matches prefix order of constant nodes in trees
253      var paramNodes = new List<ISymbolicExpressionTreeNode>();
254      foreach (var t in trees) {
255        foreach (var n in t.IterateNodesPrefix()) {
256          if (IsConstantNode(n))
257            paramNodes.Add(n);
258        }
259      }
260      // init params randomly from Unif(-1e-5, 1e-5)
261      var theta = paramNodes.Select(_ => random.NextDouble() * 2.0e-2 - 1.0e-2).ToArray();
262
263      optTheta = new double[0];
264      if (theta.Length > 0) {
265        alglib.minlbfgsstate state;
266        alglib.minlbfgsreport report;
267        alglib.minlbfgscreate(Math.Min(theta.Length, 5), theta, out state);
268        alglib.minlbfgssetcond(state, 0.0, 0.0, 0.0, MaximumParameterOptimizationIterations);
269        //alglib.minlbfgssetgradientcheck(state, 1e-6);
270        alglib.minlbfgsoptimize(state, EvaluateObjectiveAndGradient, null,
271          new object[] { trees, targetVars, problemData, targetValues, episodes.ToArray(), NumericIntegrationSteps, latentVariables, OdeSolver }); //TODO: create a type
272
273        alglib.minlbfgsresults(state, out optTheta, out report);
274
275        /*
276         *
277         *         L-BFGS algorithm results
278
279          INPUT PARAMETERS:
280              State   -   algorithm state
281
282          OUTPUT PARAMETERS:
283              X       -   array[0..N-1], solution
284              Rep     -   optimization report:
285                          * Rep.TerminationType completetion code:
286                              * -7    gradient verification failed.
287                                      See MinLBFGSSetGradientCheck() for more information.
288                              * -2    rounding errors prevent further improvement.
289                                      X contains best point found.
290                              * -1    incorrect parameters were specified
291                              *  1    relative function improvement is no more than
292                                      EpsF.
293                              *  2    relative step is no more than EpsX.
294                              *  4    gradient norm is no more than EpsG
295                              *  5    MaxIts steps was taken
296                              *  7    stopping conditions are too stringent,
297                                      further improvement is impossible
298                          * Rep.IterationsCount contains iterations count
299                          * NFEV countains number of function calculations
300         */
301        if (report.terminationtype < 0) { nmse = 10E6; return; }
302      }
303
304      // perform evaluation for optimal theta to get quality value
305      double[] grad = new double[optTheta.Length];
306      nmse = double.NaN;
307      EvaluateObjectiveAndGradient(optTheta, ref nmse, grad,
308        new object[] { trees, targetVars, problemData, targetValues, episodes.ToArray(), NumericIntegrationSteps, latentVariables, OdeSolver });
309      if (double.IsNaN(nmse) || double.IsInfinity(nmse)) { nmse = 10E6; return; } // return a large value (TODO: be consistent by using NMSE)
310    }
311
312    private static void EvaluateObjectiveAndGradient(double[] x, ref double f, double[] grad, object obj) {
313      var trees = (ISymbolicExpressionTree[])((object[])obj)[0];
314      var targetVariables = (string[])((object[])obj)[1];
315      var problemData = (IRegressionProblemData)((object[])obj)[2];
316      var targetValues = (double[,])((object[])obj)[3];
317      var episodes = (IntRange[])((object[])obj)[4];
318      var numericIntegrationSteps = (int)((object[])obj)[5];
319      var latentVariables = (string[])((object[])obj)[6];
320      var odeSolver = (string)((object[])obj)[7];
321
322
323      Tuple<double, Vector>[][] predicted = null; // one array of predictions for each episode
324      // TODO: stop integration early for diverging solutions
325      predicted = Integrate(
326          trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
327          problemData.Dataset,
328          problemData.AllowedInputVariables.ToArray(),
329          targetVariables,
330          latentVariables,
331          episodes,
332          x,
333          odeSolver,
334          numericIntegrationSteps).ToArray();
335
336      if (predicted.Length != targetValues.GetLength(0)) {
337        f = double.MaxValue;
338        Array.Clear(grad, 0, grad.Length);
339        return;
340      }
341
342      // for normalized MSE = 1/variance(t) * MSE(t, pred)
343      // TODO: Perf. (by standardization of target variables before evaluation of all trees)     
344      var invVar = Enumerable.Range(0, targetVariables.Length)
345        .Select(c => Enumerable.Range(0, targetValues.GetLength(0)).Select(row => targetValues[row, c])) // column vectors
346        .Select(vec => vec.Variance())
347        .Select(v => 1.0 / v)
348        .ToArray();
349
350      // objective function is NMSE
351      f = 0.0;
352      int n = predicted.Length;
353      double invN = 1.0 / n;
354      var g = Vector.Zero;
355      int r = 0;
356      foreach (var y_pred in predicted) {
357        for (int c = 0; c < y_pred.Length; c++) {
358
359          var y_pred_f = y_pred[c].Item1;
360          var y = targetValues[r, c];
361
362          var res = (y - y_pred_f);
363          var ressq = res * res;
364          f += ressq * invN * invVar[c];
365          g += -2.0 * res * y_pred[c].Item2 * invN * invVar[c];
366        }
367        r++;
368      }
369
370      g.CopyTo(grad);
371    }
372
373    public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) {
374      base.Analyze(individuals, qualities, results, random);
375
376      if (!results.ContainsKey("Prediction (training)")) {
377        results.Add(new Result("Prediction (training)", typeof(ReadOnlyItemList<DataTable>)));
378      }
379      if (!results.ContainsKey("Prediction (test)")) {
380        results.Add(new Result("Prediction (test)", typeof(ReadOnlyItemList<DataTable>)));
381      }
382      if (!results.ContainsKey("Models")) {
383        results.Add(new Result("Models", typeof(VariableCollection)));
384      }
385
386      var bestIndividualAndQuality = this.GetBestIndividual(individuals, qualities);
387      var trees = bestIndividualAndQuality.Item1.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
388
389      var problemData = ProblemData;
390      var targetVars = TargetVariables.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray();
391      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
392
393      var trainingList = new ItemList<DataTable>();
394
395      if (OptimizeParametersForEpisodes) {
396        var eIdx = 0;
397        var trainingPredictions = new List<Tuple<double, Vector>[][]>();
398        foreach (var episode in TrainingEpisodes) {
399          var episodes = new[] { episode };
400          var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta_" + eIdx]).ToArray(); // see evaluate
401          var trainingPrediction = Integrate(
402                                   trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
403                                   problemData.Dataset,
404                                   problemData.AllowedInputVariables.ToArray(),
405                                   targetVars,
406                                   latentVariables,
407                                   episodes,
408                                   optTheta,
409                                   OdeSolver,
410                                   NumericIntegrationSteps).ToArray();
411          trainingPredictions.Add(trainingPrediction);
412          eIdx++;
413        }
414
415        // only for actual target values
416        var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start));
417        for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
418          var targetVar = targetVars[colIdx];
419          var trainingDataTable = new DataTable(targetVar + " prediction (training)");
420          var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
421          var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPredictions.SelectMany(arr => arr.Select(row => row[colIdx].Item1)).ToArray());
422          trainingDataTable.Rows.Add(actualValuesRow);
423          trainingDataTable.Rows.Add(predictedValuesRow);
424          trainingList.Add(trainingDataTable);
425        }
426        results["Prediction (training)"].Value = trainingList.AsReadOnly();
427
428
429        var models = new VariableCollection();
430
431        foreach (var tup in targetVars.Zip(trees, Tuple.Create)) {
432          var targetVarName = tup.Item1;
433          var tree = tup.Item2;
434
435          var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)");
436          origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
437          models.Add(origTreeVar);
438        }
439        results["Models"].Value = models;
440      } else {
441        var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta"]).ToArray(); // see evaluate
442        var trainingPrediction = Integrate(
443                                   trees,  // we assume trees contain expressions for the change of each target variable over time dy/dt
444                                   problemData.Dataset,
445                                   problemData.AllowedInputVariables.ToArray(),
446                                   targetVars,
447                                   latentVariables,
448                                   TrainingEpisodes,
449                                   optTheta,
450                                   OdeSolver,
451                                   NumericIntegrationSteps).ToArray();
452        // only for actual target values
453        var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start));
454        for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
455          var targetVar = targetVars[colIdx];
456          var trainingDataTable = new DataTable(targetVar + " prediction (training)");
457          var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
458          var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPrediction.Select(arr => arr[colIdx].Item1).ToArray());
459          trainingDataTable.Rows.Add(actualValuesRow);
460          trainingDataTable.Rows.Add(predictedValuesRow);
461          trainingList.Add(trainingDataTable);
462        }
463        // TODO: DRY for training and test
464        var testList = new ItemList<DataTable>();
465        var testRows = ProblemData.TestIndices.ToArray();
466        var testPrediction = Integrate(
467         trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
468         problemData.Dataset,
469         problemData.AllowedInputVariables.ToArray(),
470         targetVars,
471         latentVariables,
472         new IntRange[] { ProblemData.TestPartition },
473         optTheta,
474         OdeSolver,
475         NumericIntegrationSteps).ToArray();
476
477        for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
478          var targetVar = targetVars[colIdx];
479          var testDataTable = new DataTable(targetVar + " prediction (test)");
480          var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, testRows));
481          var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
482          testDataTable.Rows.Add(actualValuesRow);
483          testDataTable.Rows.Add(predictedValuesRow);
484          testList.Add(testDataTable);
485        }
486
487        results["Prediction (training)"].Value = trainingList.AsReadOnly();
488        results["Prediction (test)"].Value = testList.AsReadOnly();
489        #region simplification of models
490        // TODO the dependency of HeuristicLab.Problems.DataAnalysis.Symbolic is not ideal
491        var models = new VariableCollection();    // to store target var names and original version of tree
492
493        int nextParIdx = 0;
494        foreach (var tup in targetVars.Zip(trees, Tuple.Create)) {
495          var targetVarName = tup.Item1;
496          var tree = tup.Item2;
497
498          // when we reference HeuristicLab.Problems.DataAnalysis.Symbolic we can translate symbols
499          var shownTree = new SymbolicExpressionTree(TranslateTreeNode(tree.Root, optTheta, ref nextParIdx));
500
501          // var shownTree = (SymbolicExpressionTree)tree.Clone();
502          // var constantsNodeOrig = tree.IterateNodesPrefix().Where(IsConstantNode);
503          // var constantsNodeShown = shownTree.IterateNodesPrefix().Where(IsConstantNode);
504          //
505          // foreach (var n in constantsNodeOrig.Zip(constantsNodeShown, (original, shown) => new { original, shown })) {
506          //   double constantsVal = optTheta[nodeIdx[n.original]];
507          //
508          //   ConstantTreeNode replacementNode = new ConstantTreeNode(new Constant()) { Value = constantsVal };
509          //
510          //   var parentNode = n.shown.Parent;
511          //   int replacementIndex = parentNode.IndexOfSubtree(n.shown);
512          //   parentNode.RemoveSubtree(replacementIndex);
513          //   parentNode.InsertSubtree(replacementIndex, replacementNode);
514          // }
515
516          var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)");
517          origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
518          models.Add(origTreeVar);
519          var simplifiedTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(simplified)");
520          simplifiedTreeVar.Value = TreeSimplifier.Simplify(shownTree);
521          models.Add(simplifiedTreeVar);
522
523        }
524        results["Models"].Value = models;
525        #endregion
526      }
527    }
528
529
530    #region interpretation
531
532    // the following uses auto-diff to calculate the gradient w.r.t. the parameters forward in time.
533    // this is basically the method described in Gronwall T. Note on the derivatives with respect to a parameter of the solutions of a system of differential equations. Ann. Math. 1919;20:292–296.
534
535    // a comparison of three potential calculation methods for the gradient is given in:
536    // Sengupta, B., Friston, K. J., & Penny, W. D. (2014). Efficient gradient computation for dynamical models. Neuroimage, 98(100), 521–527. http://doi.org/10.1016/j.neuroimage.2014.04.040
537    // "Our comparison establishes that the adjoint method is computationally more efficient for numerical estimation of parametric gradients
538    // for state-space models — both linear and non-linear, as in the case of a dynamical causal model (DCM)"
539
540    // for a solver with the necessary features see: https://computation.llnl.gov/projects/sundials/cvodes
541
542    private static IEnumerable<Tuple<double, Vector>[]> Integrate(
543      ISymbolicExpressionTree[] trees, IDataset dataset, string[] inputVariables, string[] targetVariables, string[] latentVariables, IEnumerable<IntRange> episodes,
544      double[] parameterValues,
545      string odeSolver, int numericIntegrationSteps = 100) {
546
547      // TODO: numericIntegrationSteps is only relevant for the HeuristicLab solver
548
549      foreach (var episode in episodes) {
550        var rows = Enumerable.Range(episode.Start, episode.End - episode.Start);
551        // return first value as stored in the dataset
552        yield return targetVariables
553          .Select(targetVar => Tuple.Create(dataset.GetDoubleValue(targetVar, rows.First()), Vector.Zero))
554          .ToArray();
555
556        // integrate forward starting with known values for the target in t0
557
558        var variableValues = new Dictionary<string, Tuple<double, Vector>>();
559        var t0 = rows.First();
560        foreach (var varName in inputVariables) {
561          variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero));
562        }
563        foreach (var varName in targetVariables) {
564          variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero));
565        }
566        // add value entries for latent variables which are also integrated
567        foreach (var latentVar in latentVariables) {
568          variableValues.Add(latentVar, Tuple.Create(0.0, Vector.Zero)); // we don't have observations for latent variables -> assume zero as starting value TODO
569        }
570        var calculatedVariables = targetVariables.Concat(latentVariables).ToArray(); // TODO: must conincide with the order of trees in the encoding
571
572        var prevT = rows.First(); // TODO: here we should use a variable for t if it is available. Right now we assume equidistant measurements.
573        foreach (var t in rows.Skip(1)) {
574          if (odeSolver == "HeuristicLab")
575            IntegrateHL(trees, calculatedVariables, variableValues, parameterValues, numericIntegrationSteps);
576          else if (odeSolver == "CVODES")
577            IntegrateCVODES(trees, calculatedVariables, variableValues, parameterValues, t - prevT);
578          else throw new InvalidOperationException("Unknown ODE solver " + odeSolver);
579          prevT = t;
580
581          // This check doesn't work with the HeuristicLab integrator if there are input variables
582          //if (variableValues.Count == targetVariables.Length) {
583          // only return the target variables for calculation of errors
584          var res = targetVariables
585            .Select(targetVar => variableValues[targetVar])
586            .ToArray();
587          if (res.Any(ri => double.IsNaN(ri.Item1) || double.IsInfinity(ri.Item1))) yield break;
588          yield return res;
589          //} else {
590          //  yield break; // stop early on problems
591          //}
592
593
594          // update for next time step
595          foreach (var varName in inputVariables) {
596            variableValues[varName] = Tuple.Create(dataset.GetDoubleValue(varName, t), Vector.Zero);
597          }
598        }
599      }
600    }
601
602
603    /// <summary>
604    ///  Here we use CVODES to solve the ODE. Forward sensitivities are used to calculate the gradient for parameter optimization
605    /// </summary>
606    /// <param name="trees">Each equation in the ODE represented as a tree</param>
607    /// <param name="calculatedVariables">The names of the calculated variables</param>
608    /// <param name="variableValues">The start values of the calculated variables as well as their sensitivites over parameters</param>
609    /// <param name="parameterValues">The current parameter values</param>
610    /// <param name="t">The time t up to which we need to integrate.</param>
611    private static void IntegrateCVODES(
612      ISymbolicExpressionTree[] trees, // f(y,p) in tree representation
613      string[] calculatedVariables, // names of elements of y
614      Dictionary<string, Tuple<double, Vector>> variableValues,  //  y (intput and output) input: y(t0), output: y(t0+t)
615      double[] parameterValues, // p
616      double t // duration t for which we want to integrate
617      ) {
618
619      // the RHS of the ODE
620      // dy/dt = f(y_t,x_t,p)
621      CVODES.CVRhsFunc f = CreateOdeRhs(trees, calculatedVariables, parameterValues);
622      // the Jacobian ∂f/∂y
623      CVODES.CVDlsJacFunc jac = CreateJac(trees, calculatedVariables, parameterValues);
624
625      // the RHS for the forward sensitivities (∂f/∂y)s_i(t) + ∂f/∂p_i
626      CVODES.CVSensRhsFn sensF = CreateSensitivityRhs(trees, calculatedVariables, parameterValues);
627
628      // setup solver
629      int numberOfEquations = trees.Length;
630      IntPtr y = IntPtr.Zero;
631      IntPtr cvode_mem = IntPtr.Zero;
632      IntPtr A = IntPtr.Zero;
633      IntPtr yS0 = IntPtr.Zero;
634      IntPtr linearSolver = IntPtr.Zero;
635      var ns = parameterValues.Length; // number of parameters
636
637      try {
638        y = CVODES.N_VNew_Serial(numberOfEquations);
639        // init y to current values of variables
640        // y must be initialized before calling CVodeInit
641        for (int i = 0; i < calculatedVariables.Length; i++) {
642          CVODES.NV_Set_Ith_S(y, i, variableValues[calculatedVariables[i]].Item1);
643        }
644
645        cvode_mem = CVODES.CVodeCreate(CVODES.MultistepMethod.CV_ADAMS, CVODES.NonlinearSolverIteration.CV_FUNCTIONAL);
646
647        var flag = CVODES.CVodeInit(cvode_mem, f, 0.0, y);
648        Debug.Assert(CVODES.CV_SUCCESS == flag);
649
650        double relTol = 1.0e-2;
651        double absTol = 1.0;
652        flag = CVODES.CVodeSStolerances(cvode_mem, relTol, absTol);  // TODO: probably need to adjust absTol per variable
653        Debug.Assert(CVODES.CV_SUCCESS == flag);
654
655        A = CVODES.SUNDenseMatrix(numberOfEquations, numberOfEquations);
656        Debug.Assert(A != IntPtr.Zero);
657
658        linearSolver = CVODES.SUNDenseLinearSolver(y, A);
659        Debug.Assert(linearSolver != IntPtr.Zero);
660
661        flag = CVODES.CVDlsSetLinearSolver(cvode_mem, linearSolver, A);
662        Debug.Assert(CVODES.CV_SUCCESS == flag);
663
664        flag = CVODES.CVDlsSetJacFn(cvode_mem, jac);
665        Debug.Assert(CVODES.CV_SUCCESS == flag);
666
667        yS0 = CVODES.N_VCloneVectorArray_Serial(ns, y); // clone the output vector for each parameter
668        unsafe {
669          // set to initial sensitivities supplied by caller
670          for (int pIdx = 0; pIdx < ns; pIdx++) {
671            var yS0_i = *((IntPtr*)yS0.ToPointer() + pIdx);
672            for (var varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
673              CVODES.NV_Set_Ith_S(yS0_i, varIdx, variableValues[calculatedVariables[varIdx]].Item2[pIdx]);
674            }
675          }
676        }
677
678        flag = CVODES.CVodeSensInit(cvode_mem, ns, CVODES.CV_SIMULTANEOUS, sensF, yS0);
679        Debug.Assert(CVODES.CV_SUCCESS == flag);
680
681        flag = CVODES.CVodeSensEEtolerances(cvode_mem);
682        Debug.Assert(CVODES.CV_SUCCESS == flag);
683
684        // make one forward integration step
685        double tout = 0.0; // first output time
686        flag = CVODES.CVode(cvode_mem, t, y, ref tout, CVODES.CV_NORMAL);
687        if (flag == CVODES.CV_SUCCESS) {
688          Debug.Assert(t == tout);
689
690          // get sensitivities
691          flag = CVODES.CVodeGetSens(cvode_mem, ref tout, yS0);
692          Debug.Assert(CVODES.CV_SUCCESS == flag);
693
694          // update variableValues based on integration results
695          for (int varIdx = 0; varIdx < calculatedVariables.Length; varIdx++) {
696            var yi = CVODES.NV_Get_Ith_S(y, varIdx);
697            var gArr = new double[parameterValues.Length];
698            for (var pIdx = 0; pIdx < parameterValues.Length; pIdx++) {
699              unsafe {
700                var yS0_pi = *((IntPtr*)yS0.ToPointer() + pIdx);
701                gArr[pIdx] = CVODES.NV_Get_Ith_S(yS0_pi, varIdx);
702              }
703            }
704            variableValues[calculatedVariables[varIdx]] = Tuple.Create(yi, new Vector(gArr));
705          }
706        } else {
707          variableValues.Clear();   // indicate problems by not returning new values
708        }
709
710        // cleanup all allocated objects
711      } finally {
712        if (y != IntPtr.Zero) CVODES.N_VDestroy_Serial(y);
713        if (cvode_mem != IntPtr.Zero) CVODES.CVodeFree(ref cvode_mem);
714        if (linearSolver != IntPtr.Zero) CVODES.SUNLinSolFree(linearSolver);
715        if (A != IntPtr.Zero) CVODES.SUNMatDestroy(A);
716        if (yS0 != IntPtr.Zero) CVODES.N_VDestroyVectorArray_Serial(yS0, ns);
717      }
718    }
719
720
721    private static CVODES.CVRhsFunc CreateOdeRhs(
722      ISymbolicExpressionTree[] trees,
723      string[] calculatedVariables,
724      double[] parameterValues) {
725      // we don't need to calculate a gradient here -> no nodes are selected for
726      // --> no nodes are selected to be included in the gradient calculation
727      var nodeIdx = new Dictionary<ISymbolicExpressionTreeNode, int>();
728      return (double t,
729              IntPtr y, // N_Vector, current value of y (input)
730              IntPtr ydot, // N_Vector (calculated value of y' (output)
731              IntPtr user_data // optional user data, (unused here)
732              ) => {
733                // TODO: perf
734                var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
735
736                int pIdx = 0;
737                foreach (var tree in trees) {
738                  foreach (var n in tree.IterateNodesPrefix()) {
739                    if (IsConstantNode(n)) {
740                      nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we do not need a gradient
741                      pIdx++;
742                    } else if (n.SubtreeCount == 0) {
743                      // for variables and latent variables get the value from variableValues
744                      var varName = n.Symbol.Name;
745                      var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
746                      if (varIdx < 0) throw new InvalidProgramException();
747                      var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
748                      nodeValues.Add(n, Tuple.Create(y_i, Vector.Zero)); // no gradient needed
749                    }
750                  }
751                }
752                for (int i = 0; i < trees.Length; i++) {
753                  var tree = trees[i];
754                  var res_i = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
755                  CVODES.NV_Set_Ith_S(ydot, i, res_i.Item1);
756                }
757                return 0;
758              };
759    }
760
761    private static CVODES.CVDlsJacFunc CreateJac(
762      ISymbolicExpressionTree[] trees,
763      string[] calculatedVariables,
764      double[] parameterValues) {
765
766      return (
767        double t, // current time (input)
768        IntPtr y, // N_Vector, current value of y (input)
769        IntPtr fy, // N_Vector, current value of f (input)
770        IntPtr Jac, // SUNMatrix ∂f/∂y (output, rows i contains are ∂f_i/∂y vector)
771        IntPtr user_data, // optional (unused here)
772        IntPtr tmp1, // N_Vector, optional (unused here)
773        IntPtr tmp2, // N_Vector, optional (unused here)
774        IntPtr tmp3 // N_Vector, optional (unused here)
775      ) => {
776        // here we need to calculate partial derivatives for the calculated variables y
777        var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
778        int pIdx = 0;
779        foreach (var tree in trees) {
780          foreach (var n in tree.IterateNodesPrefix()) {
781            if (IsConstantNode(n)) {
782              nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], Vector.Zero)); // here we need a gradient over y which is zero for parameters
783              pIdx++;
784            } else if (n.SubtreeCount == 0) {
785              // for variables and latent variables we use values supplied in y and init gradient vectors accordingly
786              var varName = n.Symbol.Name;
787              var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
788              if (varIdx < 0) throw new InvalidProgramException();
789
790              var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
791              var gArr = new double[CVODES.NV_LENGTH_S(y)]; // backing array
792              gArr[varIdx] = 1.0;
793              var g = new Vector(gArr);
794              nodeValues.Add(n, Tuple.Create(y_i, g));
795            }
796          }
797        }
798
799        for (int i = 0; i < trees.Length; i++) {
800          var tree = trees[i];
801          var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
802          var g = res.Item2;
803          for (int j = 0; j < calculatedVariables.Length; j++) {
804            CVODES.SUNDenseMatrix_Set(Jac, i, j, g[j]);
805          }
806        }
807        return 0; // on success
808      };
809    }
810
811
812    // to calculate sensitivities RHS for all equations at once
813    // must compute (∂f/∂y)s_i(t) + ∂f/∂p_i and store in ySdot.
814    // Index i refers to parameters, dimensionality of matrix and vectors is number of equations
815    private static CVODES.CVSensRhsFn CreateSensitivityRhs(ISymbolicExpressionTree[] trees, string[] calculatedVariables, double[] parameterValues) {
816      return (
817              int Ns, // number of parameters
818              double t, // current time
819              IntPtr y, // N_Vector y(t) (input)
820              IntPtr ydot, // N_Vector dy/dt(t) (input)
821              IntPtr yS, // N_Vector*, one vector for each parameter (input)
822              IntPtr ySdot, // N_Vector*, one vector for each parameter (output)
823              IntPtr user_data, // optional (unused here)
824              IntPtr tmp1, // N_Vector, optional (unused here)
825              IntPtr tmp2 // N_Vector, optional (unused here)
826        ) => {
827          // here we need to calculate partial derivatives for the calculated variables y as well as for the parameters
828          var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
829          var d = calculatedVariables.Length + parameterValues.Length; // dimensionality of gradient
830          // first collect variable values
831          foreach (var tree in trees) {
832            foreach (var n in tree.IterateNodesPrefix()) {
833              if (IsVariableNode(n)) {
834                // for variables and latent variables we use values supplied in y and init gradient vectors accordingly
835                var varName = n.Symbol.Name;
836                var varIdx = Array.IndexOf(calculatedVariables, varName); // TODO: perf!
837                if (varIdx < 0) throw new InvalidProgramException();
838
839                var y_i = CVODES.NV_Get_Ith_S(y, (long)varIdx);
840                var gArr = new double[d]; // backing array
841                gArr[varIdx] = 1.0;
842                var g = new Vector(gArr);
843                nodeValues.Add(n, Tuple.Create(y_i, g));
844              }
845            }
846          }
847          // then collect constants
848          int pIdx = 0;
849          foreach (var tree in trees) {
850            foreach (var n in tree.IterateNodesPrefix()) {
851              if (IsConstantNode(n)) {
852                var gArr = new double[d];
853                gArr[calculatedVariables.Length + pIdx] = 1.0;
854                var g = new Vector(gArr);
855                nodeValues.Add(n, Tuple.Create(parameterValues[pIdx], g));
856                pIdx++;
857              }
858            }
859          }
860          // gradient vector is [∂f/∂y_1, ∂f/∂y_2, ... ∂f/∂yN, ∂f/∂p_1 ... ∂f/∂p_K]
861
862
863          for (pIdx = 0; pIdx < Ns; pIdx++) {
864            unsafe {
865              var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx);
866              CVODES.N_VConst_Serial(0.0, sDot_pi);
867            }
868          }
869
870          for (int i = 0; i < trees.Length; i++) {
871            var tree = trees[i];
872            var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
873            var g = res.Item2;
874
875
876            // update ySdot = (∂f/∂y)s_i(t) + ∂f/∂p_i
877
878            for (pIdx = 0; pIdx < Ns; pIdx++) {
879              unsafe {
880                var sDot_pi = *((IntPtr*)ySdot.ToPointer() + pIdx);
881                var s_pi = *((IntPtr*)yS.ToPointer() + pIdx);
882
883                var v = CVODES.NV_Get_Ith_S(sDot_pi, i);
884                // (∂f/∂y)s_i(t)
885                var p = 0.0;
886                for (int yIdx = 0; yIdx < calculatedVariables.Length; yIdx++) {
887                  p += g[yIdx] * CVODES.NV_Get_Ith_S(s_pi, yIdx);
888                }
889                // + ∂f/∂p_i
890                CVODES.NV_Set_Ith_S(sDot_pi, i, v + p + g[calculatedVariables.Length + pIdx]);
891              }
892            }
893
894          }
895          return 0; // on success
896        };
897    }
898
899    private static void IntegrateHL(
900      ISymbolicExpressionTree[] trees,
901      string[] calculatedVariables, // names of integrated variables
902      Dictionary<string, Tuple<double, Vector>> variableValues, //y (intput and output) input: y(t0), output: y(t0+1)
903      double[] parameterValues,
904      int numericIntegrationSteps) {
905
906      var nodeValues = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
907
908      // the nodeValues for parameters are constant over time
909      // TODO: this needs to be done only once for each iteration in gradient descent (whenever parameter values change)
910      // NOTE: the order must be the same as above (prefix order for nodes)
911      int pIdx = 0;
912      foreach (var tree in trees) {
913        foreach (var node in tree.Root.IterateNodesPrefix()) {
914          if (IsConstantNode(node)) {
915            var gArr = new double[parameterValues.Length]; // backing array
916            gArr[pIdx] = 1.0;
917            var g = new Vector(gArr);
918            nodeValues.Add(node, new Tuple<double, Vector>(parameterValues[pIdx], g));
919            pIdx++;
920          } else if (node.SubtreeCount == 0) {
921            // for (latent) variables get the values from variableValues
922            var varName = node.Symbol.Name;
923            nodeValues.Add(node, variableValues[varName]);
924          }
925        }
926      }
927
928
929      double h = 1.0 / numericIntegrationSteps;
930      for (int step = 0; step < numericIntegrationSteps; step++) {
931        var deltaValues = new Dictionary<string, Tuple<double, Vector>>();
932        for (int i = 0; i < trees.Length; i++) {
933          var tree = trees[i];
934          var targetVarName = calculatedVariables[i];
935
936          // Root.GetSubtree(0).GetSubtree(0) skips programRoot and startSymbol
937          var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), nodeValues);
938          deltaValues.Add(targetVarName, res);
939        }
940
941        // update variableValues for next step, trapezoid integration
942        foreach (var kvp in deltaValues) {
943          var oldVal = variableValues[kvp.Key];
944          var newVal = Tuple.Create(
945            oldVal.Item1 + h * kvp.Value.Item1,
946            oldVal.Item2 + h * kvp.Value.Item2
947          );
948          variableValues[kvp.Key] = newVal;
949        }
950        // update nodeValues from variableValues
951        // TODO: perf using dictionary with list of nodes for each variable
952        foreach (var tree in trees) {
953          foreach (var node in tree.Root.IterateNodesPrefix().Where(n => IsVariableNode(n))) {
954            var varName = node.Symbol.Name;
955            nodeValues[node] = variableValues[varName];
956          }
957        }
958      }
959    }
960
961    private static Tuple<double, Vector> InterpretRec(
962      ISymbolicExpressionTreeNode node,
963      Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>> nodeValues      // contains value and gradient vector for a node (variables and constants only)
964        ) {
965
966      switch (node.Symbol.Name) {
967        case "+": {
968            var l = InterpretRec(node.GetSubtree(0), nodeValues);
969            var r = InterpretRec(node.GetSubtree(1), nodeValues);
970
971            return Tuple.Create(l.Item1 + r.Item1, l.Item2 + r.Item2);
972          }
973        case "*": {
974            var l = InterpretRec(node.GetSubtree(0), nodeValues);
975            var r = InterpretRec(node.GetSubtree(1), nodeValues);
976
977            return Tuple.Create(l.Item1 * r.Item1, l.Item2 * r.Item1 + l.Item1 * r.Item2);
978          }
979
980        case "-": {
981            var l = InterpretRec(node.GetSubtree(0), nodeValues);
982            var r = InterpretRec(node.GetSubtree(1), nodeValues);
983
984            return Tuple.Create(l.Item1 - r.Item1, l.Item2 - r.Item2);
985          }
986        case "%": {
987            var l = InterpretRec(node.GetSubtree(0), nodeValues);
988            var r = InterpretRec(node.GetSubtree(1), nodeValues);
989
990            // protected division
991            if (r.Item1.IsAlmost(0.0)) {
992              return Tuple.Create(0.0, Vector.Zero);
993            } else {
994              return Tuple.Create(
995                l.Item1 / r.Item1,
996                l.Item1 * -1.0 / (r.Item1 * r.Item1) * r.Item2 + 1.0 / r.Item1 * l.Item2 // (f/g)' = f * (1/g)' + 1/g * f' = f * -1/g² * g' + 1/g * f'
997                );
998            }
999          }
1000        case "sin": {
1001            var x = InterpretRec(node.GetSubtree(0), nodeValues);
1002            return Tuple.Create(
1003              Math.Sin(x.Item1),
1004              Vector.Cos(x.Item2) * x.Item2
1005            );
1006          }
1007        case "cos": {
1008            var x = InterpretRec(node.GetSubtree(0), nodeValues);
1009            return Tuple.Create(
1010              Math.Cos(x.Item1),
1011              -Vector.Sin(x.Item2) * x.Item2
1012            );
1013          }
1014        default: {
1015            return nodeValues[node];  // value and gradient for constants and variables must be set by the caller
1016          }
1017      }
1018    }
1019    #endregion
1020
1021    #region events
1022    /*
1023     * Dependencies between parameters:
1024     *
1025     * ProblemData
1026     *    |
1027     *    V
1028     * TargetVariables   FunctionSet    MaximumLength    NumberOfLatentVariables
1029     *               |   |                 |                   |
1030     *               V   V                 |                   |
1031     *             Grammar <---------------+-------------------
1032     *                |
1033     *                V
1034     *            Encoding
1035     */
1036    private void RegisterEventHandlers() {
1037      ProblemDataParameter.ValueChanged += ProblemDataParameter_ValueChanged;
1038      if (ProblemDataParameter.Value != null) ProblemDataParameter.Value.Changed += ProblemData_Changed;
1039
1040      TargetVariablesParameter.ValueChanged += TargetVariablesParameter_ValueChanged;
1041      if (TargetVariablesParameter.Value != null) TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
1042
1043      FunctionSetParameter.ValueChanged += FunctionSetParameter_ValueChanged;
1044      if (FunctionSetParameter.Value != null) FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
1045
1046      MaximumLengthParameter.Value.ValueChanged += MaximumLengthChanged;
1047
1048      NumberOfLatentVariablesParameter.Value.ValueChanged += NumLatentVariablesChanged;
1049    }
1050
1051    private void NumLatentVariablesChanged(object sender, EventArgs e) {
1052      UpdateGrammarAndEncoding();
1053    }
1054
1055    private void MaximumLengthChanged(object sender, EventArgs e) {
1056      UpdateGrammarAndEncoding();
1057    }
1058
1059    private void FunctionSetParameter_ValueChanged(object sender, EventArgs e) {
1060      FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
1061    }
1062
1063    private void CheckedFunctionsChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
1064      UpdateGrammarAndEncoding();
1065    }
1066
1067    private void TargetVariablesParameter_ValueChanged(object sender, EventArgs e) {
1068      TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
1069    }
1070
1071    private void CheckedTargetVariablesChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
1072      UpdateGrammarAndEncoding();
1073    }
1074
1075    private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) {
1076      ProblemDataParameter.Value.Changed += ProblemData_Changed;
1077      OnProblemDataChanged();
1078      OnReset();
1079    }
1080
1081    private void ProblemData_Changed(object sender, EventArgs e) {
1082      OnProblemDataChanged();
1083      OnReset();
1084    }
1085
1086    private void OnProblemDataChanged() {
1087      UpdateTargetVariables();        // implicitly updates other dependent parameters
1088      var handler = ProblemDataChanged;
1089      if (handler != null) handler(this, EventArgs.Empty);
1090    }
1091
1092    #endregion
1093
1094    #region  helper
1095
1096    private void InitAllParameters() {
1097      UpdateTargetVariables(); // implicitly updates the grammar and the encoding     
1098    }
1099
1100    private ReadOnlyCheckedItemList<StringValue> CreateFunctionSet() {
1101      var l = new CheckedItemList<StringValue>();
1102      l.Add(new StringValue("+").AsReadOnly());
1103      l.Add(new StringValue("*").AsReadOnly());
1104      l.Add(new StringValue("%").AsReadOnly());
1105      l.Add(new StringValue("-").AsReadOnly());
1106      l.Add(new StringValue("sin").AsReadOnly());
1107      l.Add(new StringValue("cos").AsReadOnly());
1108      return l.AsReadOnly();
1109    }
1110
1111    private static bool IsConstantNode(ISymbolicExpressionTreeNode n) {
1112      return n.Symbol.Name.StartsWith("θ");
1113    }
1114    private static bool IsLatentVariableNode(ISymbolicExpressionTreeNode n) {
1115      return n.Symbol.Name.StartsWith("λ");
1116    }
1117    private static bool IsVariableNode(ISymbolicExpressionTreeNode n) {
1118      return (n.SubtreeCount == 0) && !IsConstantNode(n) && !IsLatentVariableNode(n);
1119    }
1120
1121
1122    private void UpdateTargetVariables() {
1123      var currentlySelectedVariables = TargetVariables.CheckedItems
1124        .OrderBy(i => i.Index)
1125        .Select(i => i.Value.Value)
1126        .ToArray();
1127
1128      var newVariablesList = new CheckedItemList<StringValue>(ProblemData.Dataset.VariableNames.Select(str => new StringValue(str).AsReadOnly()).ToArray()).AsReadOnly();
1129      var matchingItems = newVariablesList.Where(item => currentlySelectedVariables.Contains(item.Value)).ToArray();
1130      foreach (var matchingItem in matchingItems) {
1131        newVariablesList.SetItemCheckedState(matchingItem, true);
1132      }
1133      TargetVariablesParameter.Value = newVariablesList;
1134    }
1135
1136    private void UpdateGrammarAndEncoding() {
1137      var encoding = new MultiEncoding();
1138      var g = CreateGrammar();
1139      foreach (var targetVar in TargetVariables.CheckedItems) {
1140        encoding = encoding.Add(new SymbolicExpressionTreeEncoding(targetVar + "_tree", g, MaximumLength, MaximumLength)); // only limit by length
1141      }
1142      for (int i = 1; i <= NumberOfLatentVariables; i++) {
1143        encoding = encoding.Add(new SymbolicExpressionTreeEncoding("λ" + i + "_tree", g, MaximumLength, MaximumLength));
1144      }
1145      Encoding = encoding;
1146    }
1147
1148    private ISymbolicExpressionGrammar CreateGrammar() {
1149      // whenever ProblemData is changed we create a new grammar with the necessary symbols
1150      var g = new SimpleSymbolicExpressionGrammar();
1151      g.AddSymbols(FunctionSet.CheckedItems.OrderBy(i => i.Index).Select(i => i.Value.Value).ToArray(), 2, 2);
1152
1153      // TODO
1154      //g.AddSymbols(new[] {
1155      //  "exp",
1156      //  "log", // log( <expr> ) // TODO: init a theta to ensure the value is always positive
1157      //  "exp_minus" // exp((-1) * <expr>
1158      //}, 1, 1);
1159
1160      foreach (var variableName in ProblemData.AllowedInputVariables.Union(TargetVariables.CheckedItems.Select(i => i.Value.Value)))
1161        g.AddTerminalSymbol(variableName);
1162
1163      // generate symbols for numeric parameters for which the value is optimized using AutoDiff
1164      // we generate multiple symbols to balance the probability for selecting a numeric parameter in the generation of random trees
1165      var numericConstantsFactor = 2.0;
1166      for (int i = 0; i < numericConstantsFactor * (ProblemData.AllowedInputVariables.Count() + TargetVariables.CheckedItems.Count()); i++) {
1167        g.AddTerminalSymbol("θ" + i); // numeric parameter for which the value is optimized using AutoDiff
1168      }
1169
1170      // generate symbols for latent variables
1171      for (int i = 1; i <= NumberOfLatentVariables; i++) {
1172        g.AddTerminalSymbol("λ" + i); // numeric parameter for which the value is optimized using AutoDiff
1173      }
1174
1175      return g;
1176    }
1177
1178
1179    private ISymbolicExpressionTreeNode TranslateTreeNode(ISymbolicExpressionTreeNode n, double[] parameterValues, ref int nextParIdx) {
1180      ISymbolicExpressionTreeNode translatedNode = null;
1181      if (n.Symbol is StartSymbol) {
1182        translatedNode = new StartSymbol().CreateTreeNode();
1183      } else if (n.Symbol is ProgramRootSymbol) {
1184        translatedNode = new ProgramRootSymbol().CreateTreeNode();
1185      } else if (n.Symbol.Name == "+") {
1186        translatedNode = new Addition().CreateTreeNode();
1187      } else if (n.Symbol.Name == "-") {
1188        translatedNode = new Subtraction().CreateTreeNode();
1189      } else if (n.Symbol.Name == "*") {
1190        translatedNode = new Multiplication().CreateTreeNode();
1191      } else if (n.Symbol.Name == "%") {
1192        translatedNode = new Division().CreateTreeNode();
1193      } else if (n.Symbol.Name == "sin") {
1194        translatedNode = new Sine().CreateTreeNode();
1195      } else if (n.Symbol.Name == "cos") {
1196        translatedNode = new Cosine().CreateTreeNode();
1197      } else if (IsConstantNode(n)) {
1198        var constNode = (ConstantTreeNode)new Constant().CreateTreeNode();
1199        constNode.Value = parameterValues[nextParIdx];
1200        nextParIdx++;
1201        translatedNode = constNode;
1202      } else {
1203        // assume a variable name
1204        var varName = n.Symbol.Name;
1205        var varNode = (VariableTreeNode)new Variable().CreateTreeNode();
1206        varNode.Weight = 1.0;
1207        varNode.VariableName = varName;
1208        translatedNode = varNode;
1209      }
1210      foreach (var child in n.Subtrees) {
1211        translatedNode.AddSubtree(TranslateTreeNode(child, parameterValues, ref nextParIdx));
1212      }
1213      return translatedNode;
1214    }
1215    #endregion
1216
1217    #region Import & Export
1218    public void Load(IRegressionProblemData data) {
1219      Name = data.Name;
1220      Description = data.Description;
1221      ProblemData = data;
1222    }
1223
1224    public IRegressionProblemData Export() {
1225      return ProblemData;
1226    }
1227    #endregion
1228
1229  }
1230}
Note: See TracBrowser for help on using the repository browser.