Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Problem.cs @ 15970

Last change on this file since 15970 was 15970, checked in by gkronber, 6 years ago

#2925: added expressions for latent variables and allow parameterization of the number of integration steps

File size: 32.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Linq;
26using HeuristicLab.Analysis;
27using HeuristicLab.Collections;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Data;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
32using HeuristicLab.Optimization;
33using HeuristicLab.Parameters;
34using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
35using HeuristicLab.Problems.DataAnalysis;
36using HeuristicLab.Problems.Instances;
37
38namespace HeuristicLab.Problems.DynamicalSystemsModelling {
39  public class Vector {
40    public readonly static Vector Zero = new Vector(new double[0]);
41
42    public static Vector operator +(Vector a, Vector b) {
43      if (a == Zero) return b;
44      if (b == Zero) return a;
45      Debug.Assert(a.arr.Length == b.arr.Length);
46      var res = new double[a.arr.Length];
47      for (int i = 0; i < res.Length; i++)
48        res[i] = a.arr[i] + b.arr[i];
49      return new Vector(res);
50    }
51    public static Vector operator -(Vector a, Vector b) {
52      if (b == Zero) return a;
53      if (a == Zero) return -b;
54      Debug.Assert(a.arr.Length == b.arr.Length);
55      var res = new double[a.arr.Length];
56      for (int i = 0; i < res.Length; i++)
57        res[i] = a.arr[i] - b.arr[i];
58      return new Vector(res);
59    }
60    public static Vector operator -(Vector v) {
61      if (v == Zero) return Zero;
62      for (int i = 0; i < v.arr.Length; i++)
63        v.arr[i] = -v.arr[i];
64      return v;
65    }
66
67    public static Vector operator *(double s, Vector v) {
68      if (v == Zero) return Zero;
69      if (s == 0.0) return Zero;
70      var res = new double[v.arr.Length];
71      for (int i = 0; i < res.Length; i++)
72        res[i] = s * v.arr[i];
73      return new Vector(res);
74    }
75    public static Vector operator *(Vector v, double s) {
76      return s * v;
77    }
78    public static Vector operator /(double s, Vector v) {
79      if (s == 0.0) return Zero;
80      if (v == Zero) throw new ArgumentException("Division by zero vector");
81      var res = new double[v.arr.Length];
82      for (int i = 0; i < res.Length; i++)
83        res[i] = 1.0 / v.arr[i];
84      return new Vector(res);
85    }
86    public static Vector operator /(Vector v, double s) {
87      return v * 1.0 / s;
88    }
89
90
91    private readonly double[] arr; // backing array;
92
93    public Vector(double[] v) {
94      this.arr = v;
95    }
96
97    public void CopyTo(double[] target) {
98      Debug.Assert(arr.Length <= target.Length);
99      Array.Copy(arr, target, arr.Length);
100    }
101  }
102
103  [Item("Dynamical Systems Modelling Problem", "TODO")]
104  [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 900)]
105  [StorableClass]
106  public sealed class Problem : SingleObjectiveBasicProblem<MultiEncoding>, IRegressionProblem, IProblemInstanceConsumer<IRegressionProblemData>, IProblemInstanceExporter<IRegressionProblemData> {
107
108    #region parameter names
109    private const string ProblemDataParameterName = "Data";
110    private const string TargetVariablesParameterName = "Target variables";
111    private const string FunctionSetParameterName = "Function set";
112    private const string MaximumLengthParameterName = "Size limit";
113    private const string MaximumParameterOptimizationIterationsParameterName = "Max. parameter optimization iterations";
114    private const string NumberOfLatentVariablesParameterName = "Number of latent variables";
115    private const string NumericIntegrationStepsParameterName = "Steps for numeric integration";
116    #endregion
117
118    #region Parameter Properties
119    IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } }
120
121    public IValueParameter<IRegressionProblemData> ProblemDataParameter {
122      get { return (IValueParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
123    }
124    public IValueParameter<ReadOnlyCheckedItemCollection<StringValue>> TargetVariablesParameter {
125      get { return (IValueParameter<ReadOnlyCheckedItemCollection<StringValue>>)Parameters[TargetVariablesParameterName]; }
126    }
127    public IValueParameter<ReadOnlyCheckedItemCollection<StringValue>> FunctionSetParameter {
128      get { return (IValueParameter<ReadOnlyCheckedItemCollection<StringValue>>)Parameters[FunctionSetParameterName]; }
129    }
130    public IFixedValueParameter<IntValue> MaximumLengthParameter {
131      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumLengthParameterName]; }
132    }
133    public IFixedValueParameter<IntValue> MaximumParameterOptimizationIterationsParameter {
134      get { return (IFixedValueParameter<IntValue>)Parameters[MaximumParameterOptimizationIterationsParameterName]; }
135    }
136    public IFixedValueParameter<IntValue> NumberOfLatentVariablesParameter {
137      get { return (IFixedValueParameter<IntValue>)Parameters[NumberOfLatentVariablesParameterName]; }
138    }
139    public IFixedValueParameter<IntValue> NumericIntegrationStepsParameter {
140      get { return (IFixedValueParameter<IntValue>)Parameters[NumericIntegrationStepsParameterName]; }
141    }
142    #endregion
143
144    #region Properties
145    public IRegressionProblemData ProblemData {
146      get { return ProblemDataParameter.Value; }
147      set { ProblemDataParameter.Value = value; }
148    }
149    IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } }
150
151    public ReadOnlyCheckedItemCollection<StringValue> TargetVariables {
152      get { return TargetVariablesParameter.Value; }
153    }
154
155    public ReadOnlyCheckedItemCollection<StringValue> FunctionSet {
156      get { return FunctionSetParameter.Value; }
157    }
158
159    public int MaximumLength {
160      get { return MaximumLengthParameter.Value.Value; }
161    }
162    public int MaximumParameterOptimizationIterations {
163      get { return MaximumParameterOptimizationIterationsParameter.Value.Value; }
164    }
165    public int NumberOfLatentVariables {
166      get { return NumberOfLatentVariablesParameter.Value.Value; }
167    }
168    public int NumericIntegrationSteps {
169      get { return NumericIntegrationStepsParameter.Value.Value; }
170    }
171
172    #endregion                                                                                     
173
174    public event EventHandler ProblemDataChanged;
175
176    public override bool Maximization {
177      get { return false; } // we minimize NMSE
178    }
179
180    #region item cloning and persistence
181    // persistence
182    [StorableConstructor]
183    private Problem(bool deserializing) : base(deserializing) { }
184    [StorableHook(HookType.AfterDeserialization)]
185    private void AfterDeserialization() {
186      RegisterEventHandlers();
187    }
188
189    // cloning
190    private Problem(Problem original, Cloner cloner)
191      : base(original, cloner) {
192      RegisterEventHandlers();
193    }
194    public override IDeepCloneable Clone(Cloner cloner) { return new Problem(this, cloner); }
195    #endregion
196
197    public Problem()
198      : base() {
199      var targetVariables = new CheckedItemCollection<StringValue>().AsReadOnly(); // HACK: it would be better to provide a new class derived from IDataAnalysisProblem
200      var functions = CreateFunctionSet();
201      Parameters.Add(new ValueParameter<IRegressionProblemData>(ProblemDataParameterName, "The data captured from the dynamical system. Use CSV import functionality to import data.", new RegressionProblemData()));
202      Parameters.Add(new ValueParameter<ReadOnlyCheckedItemCollection<StringValue>>(TargetVariablesParameterName, "Target variables (overrides setting in ProblemData)", targetVariables));
203      Parameters.Add(new ValueParameter<ReadOnlyCheckedItemCollection<StringValue>>(FunctionSetParameterName, "The list of allowed functions", functions));
204      Parameters.Add(new FixedValueParameter<IntValue>(MaximumLengthParameterName, "The maximally allowed length of each expression. Set to a small value (5 - 25). Default = 10", new IntValue(10)));
205      Parameters.Add(new FixedValueParameter<IntValue>(MaximumParameterOptimizationIterationsParameterName, "The maximum number of iterations for optimization of parameters (using L-BFGS). More iterations makes the algorithm slower, fewer iterations might prevent convergence in the optimization scheme. Default = 100", new IntValue(100)));
206      Parameters.Add(new FixedValueParameter<IntValue>(NumberOfLatentVariablesParameterName, "Latent variables (unobserved variables) allow us to produce expressions which are integrated up and can be used in other expressions. They are handled similarly to target variables in forward simulation / integration. The difference to target variables is that there are no data to which the calculated values of latent variables are compared. Set to a small value (0 .. 5) as necessary (default = 0)", new IntValue(0)));
207      Parameters.Add(new FixedValueParameter<IntValue>(NumericIntegrationStepsParameterName, "Number of steps in the numeric integration that are taken from one row to the next (set to 1 to 100). More steps makes the algorithm slower, less steps worsens the accuracy of the numeric integration scheme.", new IntValue(10)));
208
209      RegisterEventHandlers();
210      InitAllParameters();
211    }
212
213
214    public override double Evaluate(Individual individual, IRandom random) {
215      var trees = individual.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
216
217      var problemData = ProblemData;
218      var rows = ProblemData.TrainingIndices.ToArray();
219      var targetVars = TargetVariables.CheckedItems.Select(i => i.Value).ToArray();
220      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
221      var targetValues = new double[rows.Length, targetVars.Length];
222
223      // collect values of all target variables
224      var colIdx = 0;
225      foreach (var targetVar in targetVars) {
226        int rowIdx = 0;
227        foreach (var value in problemData.Dataset.GetDoubleValues(targetVar, rows)) {
228          targetValues[rowIdx, colIdx] = value;
229          rowIdx++;
230        }
231        colIdx++;
232      }
233
234      var nodeIdx = new Dictionary<ISymbolicExpressionTreeNode, int>();
235
236      foreach (var tree in trees) {
237        foreach (var node in tree.Root.IterateNodesPrefix().Where(n => IsConstantNode(n))) {
238          nodeIdx.Add(node, nodeIdx.Count);
239        }
240      }
241
242      var theta = nodeIdx.Select(_ => random.NextDouble() * 2.0 - 1.0).ToArray(); // init params randomly from Unif(-1,1)
243
244      double[] optTheta = new double[0];
245      if (theta.Length > 0) {
246        alglib.minlbfgsstate state;
247        alglib.minlbfgsreport report;
248        alglib.minlbfgscreate(Math.Min(theta.Length, 5), theta, out state);
249        alglib.minlbfgssetcond(state, 0.0, 0.0, 0.0, MaximumParameterOptimizationIterations);
250        alglib.minlbfgsoptimize(state, EvaluateObjectiveAndGradient, null,
251          new object[] { trees, targetVars, problemData, nodeIdx, targetValues, rows, NumericIntegrationSteps, latentVariables }); //TODO: create a type
252        alglib.minlbfgsresults(state, out optTheta, out report);
253
254        /*
255         *
256         *         L-BFGS algorithm results
257
258          INPUT PARAMETERS:
259              State   -   algorithm state
260
261          OUTPUT PARAMETERS:
262              X       -   array[0..N-1], solution
263              Rep     -   optimization report:
264                          * Rep.TerminationType completetion code:
265                              * -7    gradient verification failed.
266                                      See MinLBFGSSetGradientCheck() for more information.
267                              * -2    rounding errors prevent further improvement.
268                                      X contains best point found.
269                              * -1    incorrect parameters were specified
270                              *  1    relative function improvement is no more than
271                                      EpsF.
272                              *  2    relative step is no more than EpsX.
273                              *  4    gradient norm is no more than EpsG
274                              *  5    MaxIts steps was taken
275                              *  7    stopping conditions are too stringent,
276                                      further improvement is impossible
277                          * Rep.IterationsCount contains iterations count
278                          * NFEV countains number of function calculations
279         */
280        if (report.terminationtype < 0) return double.MaxValue;
281      }
282
283      // perform evaluation for optimal theta to get quality value
284      double[] grad = new double[optTheta.Length];
285      double optQuality = double.NaN;
286      EvaluateObjectiveAndGradient(optTheta, ref optQuality, grad,
287        new object[] { trees, targetVars, problemData, nodeIdx, targetValues, rows, NumericIntegrationSteps, latentVariables });
288      if (double.IsNaN(optQuality) || double.IsInfinity(optQuality)) return 10E6; // return a large value (TODO: be consistent by using NMSE)
289
290      individual["OptTheta"] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
291      return optQuality;
292    }
293
294    private static void EvaluateObjectiveAndGradient(double[] x, ref double f, double[] grad, object obj) {
295      var trees = (ISymbolicExpressionTree[])((object[])obj)[0];
296      var targetVariables = (string[])((object[])obj)[1];
297      var problemData = (IRegressionProblemData)((object[])obj)[2];
298      var nodeIdx = (Dictionary<ISymbolicExpressionTreeNode, int>)((object[])obj)[3];
299      var targetValues = (double[,])((object[])obj)[4];
300      var rows = (int[])((object[])obj)[5];
301      var numericIntegrationSteps = (int)((object[])obj)[6];
302      var latentVariables = (string[])((object[])obj)[7];
303
304      var predicted = Integrate(
305        trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
306        problemData.Dataset,
307        problemData.AllowedInputVariables.ToArray(),
308        targetVariables,
309        latentVariables,
310        rows,
311        nodeIdx,                // TODO: is it Ok to use rows here ?
312        x, numericIntegrationSteps).ToArray();
313
314
315      // for normalized MSE = 1/variance(t) * MSE(t, pred)
316      var invVar = Enumerable.Range(0, targetVariables.Length)
317        .Select(c => rows.Select(row => targetValues[row, c])) // colums vectors
318        .Select(vec => vec.Variance())
319        .Select(v => 1.0 / v)
320        .ToArray();
321
322      // objective function is NMSE
323      f = 0.0;
324      int n = predicted.Length;
325      double invN = 1.0 / n;
326      var g = Vector.Zero;
327      int r = 0;
328      foreach (var y_pred in predicted) {
329        // TODO NMSE to put the same weight on each target regardless of the value range;
330        for (int c = 0; c < y_pred.Length; c++) {
331
332          var y_pred_f = y_pred[c].Item1;
333          var y = targetValues[r, c];
334
335          var res = (y - y_pred_f);
336          var ressq = res * res;
337          f += ressq * invN * invVar[c];
338          g += -2.0 * res * y_pred[c].Item2 * invN * invVar[c];
339        }
340        r++;
341      }
342
343      g.CopyTo(grad);
344    }
345
346    public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) {
347      base.Analyze(individuals, qualities, results, random);
348
349      if (!results.ContainsKey("Prediction (training)")) {
350        results.Add(new Result("Prediction (training)", typeof(ReadOnlyItemList<DataTable>)));
351      }
352      if (!results.ContainsKey("Prediction (test)")) {
353        results.Add(new Result("Prediction (test)", typeof(ReadOnlyItemList<DataTable>)));
354      }
355      if (!results.ContainsKey("Models")) {
356        results.Add(new Result("Models", typeof(ReadOnlyItemList<ISymbolicExpressionTree>)));
357      }
358
359      // TODO extract common functionality from Evaluate and Analyze
360      var bestIndividualAndQuality = this.GetBestIndividual(individuals, qualities);
361      var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta"]).ToArray(); // see evaluate
362      var trees = bestIndividualAndQuality.Item1.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
363      var nodeIdx = new Dictionary<ISymbolicExpressionTreeNode, int>();
364
365
366      foreach (var tree in trees) {
367        foreach (var node in tree.Root.IterateNodesPrefix().Where(n => IsConstantNode(n))) {
368          nodeIdx.Add(node, nodeIdx.Count);
369        }
370      }
371      var problemData = ProblemData;
372      var targetVars = TargetVariables.CheckedItems.Select(i => i.Value).ToArray();
373      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
374
375      var trainingList = new ItemList<DataTable>();
376      var trainingRows = ProblemData.TrainingIndices.ToArray();
377      var trainingPrediction = Integrate(
378       trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
379       problemData.Dataset,
380       problemData.AllowedInputVariables.ToArray(),
381       targetVars,
382       latentVariables,
383       trainingRows,
384       nodeIdx,
385       optTheta,
386       NumericIntegrationSteps).ToArray();
387
388      for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
389        var targetVar = targetVars[colIdx];
390        var trainingDataTable = new DataTable(targetVar + " prediction (training)");
391        var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
392        var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPrediction.Select(arr => arr[colIdx].Item1).ToArray());
393        trainingDataTable.Rows.Add(actualValuesRow);
394        trainingDataTable.Rows.Add(predictedValuesRow);
395        trainingList.Add(trainingDataTable);
396      }
397
398      // TODO: DRY for training and test
399      var testList = new ItemList<DataTable>();
400      var testRows = ProblemData.TestIndices.ToArray();
401      var testPrediction = Integrate(
402       trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
403       problemData.Dataset,
404       problemData.AllowedInputVariables.ToArray(),
405       targetVars,
406       latentVariables,
407       testRows,
408       nodeIdx,
409       optTheta,
410       NumericIntegrationSteps).ToArray();
411
412      for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
413        var targetVar = targetVars[colIdx];
414        var testDataTable = new DataTable(targetVar + " prediction (test)");
415        var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, testRows));
416        var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
417        testDataTable.Rows.Add(actualValuesRow);
418        testDataTable.Rows.Add(predictedValuesRow);
419        testList.Add(testDataTable);
420      }
421
422      results["Prediction (training)"].Value = trainingList.AsReadOnly();
423      results["Prediction (test)"].Value = testList.AsReadOnly();
424      results["Models"].Value = new ItemList<ISymbolicExpressionTree>(trees).AsReadOnly();
425    }
426
427
428    #region interpretation
429    private static IEnumerable<Tuple<double, Vector>[]> Integrate(
430      ISymbolicExpressionTree[] trees, IDataset dataset, string[] inputVariables, string[] targetVariables, string[] latentVariables, IEnumerable<int> rows,
431      Dictionary<ISymbolicExpressionTreeNode, int> nodeIdx, double[] parameterValues, int numericIntegrationSteps = 100) {
432
433      int NUM_STEPS = numericIntegrationSteps ;
434      double h = 1.0 / NUM_STEPS;
435
436      // return first value as stored in the dataset
437      yield return targetVariables
438        .Select(targetVar => Tuple.Create(dataset.GetDoubleValue(targetVar, rows.First()), Vector.Zero))
439        .ToArray();
440
441      // integrate forward starting with known values for the target in t0
442
443      var variableValues = new Dictionary<string, Tuple<double, Vector>>();
444      var t0 = rows.First();
445      foreach (var varName in inputVariables) {
446        variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero));
447      }
448      foreach (var varName in targetVariables) {
449        variableValues.Add(varName, Tuple.Create(dataset.GetDoubleValue(varName, t0), Vector.Zero));
450      }
451      // add value entries for latent variables which are also integrated
452      foreach(var latentVar in latentVariables) {
453        variableValues.Add(latentVar, Tuple.Create(0.0, Vector.Zero)); // we don't have observations for latent variables -> assume zero as starting value
454      }
455      var calculatedVariables = targetVariables.Concat(latentVariables); // TODO: must conincide with the order of trees in the encoding
456
457      foreach (var t in rows.Skip(1)) {
458        for (int step = 0; step < NUM_STEPS; step++) {
459          var deltaValues = new Dictionary<string, Tuple<double, Vector>>();
460          foreach (var tup in trees.Zip(calculatedVariables, Tuple.Create)) {
461            var tree = tup.Item1;
462            var targetVarName = tup.Item2;
463            // skip programRoot and startSymbol
464            var res = InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), variableValues, nodeIdx, parameterValues);
465            deltaValues.Add(targetVarName, res);
466          }
467
468          // update variableValues for next step
469          foreach (var kvp in deltaValues) {
470            var oldVal = variableValues[kvp.Key];
471            variableValues[kvp.Key] = Tuple.Create(
472              oldVal.Item1 + h * kvp.Value.Item1,
473              oldVal.Item2 + h * kvp.Value.Item2
474            );
475          }
476        }
477
478        // only return the target variables for calculation of errors
479        yield return targetVariables
480          .Select(targetVar => variableValues[targetVar])
481          .ToArray();
482
483        // update for next time step
484        foreach (var varName in inputVariables) {
485          variableValues[varName] = Tuple.Create(dataset.GetDoubleValue(varName, t), Vector.Zero);
486        }
487      }
488    }
489
490    private static Tuple<double, Vector> InterpretRec(
491      ISymbolicExpressionTreeNode node,
492      Dictionary<string, Tuple<double, Vector>> variableValues,
493      Dictionary<ISymbolicExpressionTreeNode, int> nodeIdx,
494      double[] parameterValues
495        ) {
496
497      switch (node.Symbol.Name) {
498        case "+": {
499            var l = InterpretRec(node.GetSubtree(0), variableValues, nodeIdx, parameterValues); // TODO capture all parameters into a state type for interpretation
500            var r = InterpretRec(node.GetSubtree(1), variableValues, nodeIdx, parameterValues);
501
502            return Tuple.Create(l.Item1 + r.Item1, l.Item2 + r.Item2);
503          }
504        case "*": {
505            var l = InterpretRec(node.GetSubtree(0), variableValues, nodeIdx, parameterValues);
506            var r = InterpretRec(node.GetSubtree(1), variableValues, nodeIdx, parameterValues);
507
508            return Tuple.Create(l.Item1 * r.Item1, l.Item2 * r.Item1 + l.Item1 * r.Item2);
509          }
510
511        case "-": {
512            var l = InterpretRec(node.GetSubtree(0), variableValues, nodeIdx, parameterValues);
513            var r = InterpretRec(node.GetSubtree(1), variableValues, nodeIdx, parameterValues);
514
515            return Tuple.Create(l.Item1 - r.Item1, l.Item2 - r.Item2);
516          }
517        case "%": {
518            var l = InterpretRec(node.GetSubtree(0), variableValues, nodeIdx, parameterValues);
519            var r = InterpretRec(node.GetSubtree(1), variableValues, nodeIdx, parameterValues);
520
521            // protected division
522            if (r.Item1.IsAlmost(0.0)) {
523              return Tuple.Create(0.0, Vector.Zero);
524            } else {
525              return Tuple.Create(
526                l.Item1 / r.Item1,
527                l.Item1 * -1.0 / (r.Item1 * r.Item1) * r.Item2 + 1.0 / r.Item1 * l.Item2 // (f/g)' = f * (1/g)' + 1/g * f' = f * -1/g² * g' + 1/g * f'
528                );
529            }
530          }
531        default: {
532            // distinguish other cases
533            if (IsConstantNode(node)) {
534              var vArr = new double[parameterValues.Length]; // backing array for vector
535              vArr[nodeIdx[node]] = 1.0;
536              var g = new Vector(vArr);
537              return Tuple.Create(parameterValues[nodeIdx[node]], g);
538            } else {
539              // assume a variable name
540              var varName = node.Symbol.Name;
541              return variableValues[varName];
542            }
543          }
544      }
545    }
546    #endregion
547
548    #region events
549    /*
550     * Dependencies between parameters:
551     *
552     * ProblemData
553     *    |
554     *    V
555     * TargetVariables   FunctionSet    MaximumLength    NumberOfLatentVariables
556     *               |   |                 |                   |
557     *               V   V                 |                   |
558     *             Grammar <---------------+-------------------
559     *                |
560     *                V
561     *            Encoding
562     */
563    private void RegisterEventHandlers() {
564      ProblemDataParameter.ValueChanged += ProblemDataParameter_ValueChanged;
565      if (ProblemDataParameter.Value != null) ProblemDataParameter.Value.Changed += ProblemData_Changed;
566
567      TargetVariablesParameter.ValueChanged += TargetVariablesParameter_ValueChanged;
568      if (TargetVariablesParameter.Value != null) TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
569
570      FunctionSetParameter.ValueChanged += FunctionSetParameter_ValueChanged;
571      if (FunctionSetParameter.Value != null) FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
572
573      MaximumLengthParameter.Value.ValueChanged += MaximumLengthChanged;
574
575      NumberOfLatentVariablesParameter.Value.ValueChanged += NumLatentVariablesChanged;
576    }
577
578    private void NumLatentVariablesChanged(object sender, EventArgs e) {
579      UpdateGrammarAndEncoding();
580    }
581
582    private void MaximumLengthChanged(object sender, EventArgs e) {
583      UpdateGrammarAndEncoding();
584    }
585
586    private void FunctionSetParameter_ValueChanged(object sender, EventArgs e) {
587      FunctionSetParameter.Value.CheckedItemsChanged += CheckedFunctionsChanged;
588    }
589
590    private void CheckedFunctionsChanged(object sender, CollectionItemsChangedEventArgs<StringValue> e) {
591      UpdateGrammarAndEncoding();
592    }
593
594    private void TargetVariablesParameter_ValueChanged(object sender, EventArgs e) {
595      TargetVariablesParameter.Value.CheckedItemsChanged += CheckedTargetVariablesChanged;
596    }
597
598    private void CheckedTargetVariablesChanged(object sender, CollectionItemsChangedEventArgs<StringValue> e) {
599      UpdateGrammarAndEncoding();
600    }
601
602    private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) {
603      ProblemDataParameter.Value.Changed += ProblemData_Changed;
604      OnProblemDataChanged();
605      OnReset();
606    }
607
608    private void ProblemData_Changed(object sender, EventArgs e) {
609      OnProblemDataChanged();
610      OnReset();
611    }
612
613    private void OnProblemDataChanged() {
614      UpdateTargetVariables();        // implicitly updates other dependent parameters
615
616      var handler = ProblemDataChanged;
617      if (handler != null) handler(this, EventArgs.Empty);
618    }
619
620    #endregion
621
622    #region  helper
623
624    private void InitAllParameters() {
625      UpdateTargetVariables(); // implicitly updates the grammar and the encoding     
626    }
627
628    private ReadOnlyCheckedItemCollection<StringValue> CreateFunctionSet() {
629      var l = new CheckedItemCollection<StringValue>();
630      l.Add(new StringValue("+").AsReadOnly());
631      l.Add(new StringValue("*").AsReadOnly());
632      l.Add(new StringValue("%").AsReadOnly());
633      l.Add(new StringValue("-").AsReadOnly());
634      return l.AsReadOnly();
635    }
636
637    private static bool IsConstantNode(ISymbolicExpressionTreeNode n) {
638      return n.Symbol.Name.StartsWith("θ");
639    }
640    private static bool IsLatentVariableNode(ISymbolicExpressionTreeNode n) {
641      return n.Symbol.Name.StartsWith("λ");
642    }
643
644
645    private void UpdateTargetVariables() {
646      var currentlySelectedVariables = TargetVariables.CheckedItems.Select(i => i.Value).ToArray();
647
648      var newVariablesList = new CheckedItemCollection<StringValue>(ProblemData.Dataset.VariableNames.Select(str => new StringValue(str).AsReadOnly()).ToArray()).AsReadOnly();
649      var matchingItems = newVariablesList.Where(item => currentlySelectedVariables.Contains(item.Value)).ToArray();
650      foreach (var matchingItem in matchingItems) {
651        newVariablesList.SetItemCheckedState(matchingItem, true);
652      }
653      TargetVariablesParameter.Value = newVariablesList;
654    }
655
656    private void UpdateGrammarAndEncoding() {
657      var encoding = new MultiEncoding();
658      var g = CreateGrammar();
659      foreach (var targetVar in TargetVariables.CheckedItems) {
660        encoding = encoding.Add(new SymbolicExpressionTreeEncoding(targetVar + "_tree", g, MaximumLength, MaximumLength)); // only limit by length
661      }
662      for (int i = 1; i <= NumberOfLatentVariables; i++) {
663        encoding = encoding.Add(new SymbolicExpressionTreeEncoding("λ" + i + "_tree", g, MaximumLength, MaximumLength));
664      }
665      Encoding = encoding;
666    }
667
668    private ISymbolicExpressionGrammar CreateGrammar() {
669      // whenever ProblemData is changed we create a new grammar with the necessary symbols
670      var g = new SimpleSymbolicExpressionGrammar();
671      g.AddSymbols(FunctionSet.CheckedItems.Select(i => i.Value).ToArray(), 2, 2);
672
673      // TODO
674      //g.AddSymbols(new[] {
675      //  "exp",
676      //  "log", // log( <expr> ) // TODO: init a theta to ensure the value is always positive
677      //  "exp_minus" // exp((-1) * <expr>
678      //}, 1, 1);
679
680      foreach (var variableName in ProblemData.AllowedInputVariables.Union(TargetVariables.CheckedItems.Select(i => i.Value)))
681        g.AddTerminalSymbol(variableName);
682
683      // generate symbols for numeric parameters for which the value is optimized using AutoDiff
684      // we generate multiple symbols to balance the probability for selecting a numeric parameter in the generation of random trees
685      var numericConstantsFactor = 2.0;
686      for (int i = 0; i < numericConstantsFactor * (ProblemData.AllowedInputVariables.Count() + TargetVariables.CheckedItems.Count()); i++) {
687        g.AddTerminalSymbol("θ" + i); // numeric parameter for which the value is optimized using AutoDiff
688      }
689
690      // generate symbols for latent variables
691      for (int i = 1; i <= NumberOfLatentVariables; i++) {
692        g.AddTerminalSymbol("λ" + i); // numeric parameter for which the value is optimized using AutoDiff
693      }
694
695      return g;
696    }
697
698    #endregion
699
700    #region Import & Export
701    public void Load(IRegressionProblemData data) {
702      Name = data.Name;
703      Description = data.Description;
704      ProblemData = data;
705    }
706
707    public IRegressionProblemData Export() {
708      return ProblemData;
709    }
710    #endregion
711
712  }
713}
Note: See TracBrowser for help on using the repository browser.