Changeset 16603


Ignore:
Timestamp:
02/13/19 16:04:10 (2 months ago)
Author:
gkronber
Message:

#2925: scaling of residuals to target variance and update constant values directly in the tree

Location:
branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/OdeParameterIdentification.cs

    r16602 r16603  
    189189      problem.Analyze(new[] { ind }, new[] { quality }, Results, rand);
    190190    }
    191 
    192     // private ISymbolicExpressionTree Convert(ISymbolicExpressionTree tree) {
    193     //   return new SymbolicExpressionTree(Convert(tree.Root));
    194     // }
    195 
    196 
    197     // for translation from symbolic expressions to simple symbols
    198     // private static Dictionary<Type, string> sym2str = new Dictionary<Type, string>() {
    199     //   {typeof(Addition), "+" },
    200     //   {typeof(Subtraction), "-" },
    201     //   {typeof(Multiplication), "*" },
    202     //   {typeof(Sine), "sin" },
    203     //   {typeof(Cosine), "cos" },
    204     //   {typeof(Square), "sqr" },
    205     // };
    206 
    207     // private ISymbolicExpressionTreeNode Convert(ISymbolicExpressionTreeNode node) {
    208     //   if (sym2str.ContainsKey(node.Symbol.GetType())) {
    209     //     var children = node.Subtrees.Select(st => Convert(st)).ToArray();
    210     //     return Make(sym2str[node.Symbol.GetType()], children);
    211     //   } else if (node.Symbol is ProgramRootSymbol) {
    212     //     var child = Convert(node.GetSubtree(0));
    213     //     node.RemoveSubtree(0);
    214     //     node.AddSubtree(child);
    215     //     return node;
    216     //   } else if (node.Symbol is StartSymbol) {
    217     //     var child = Convert(node.GetSubtree(0));
    218     //     node.RemoveSubtree(0);
    219     //     node.AddSubtree(child);
    220     //     return node;
    221     //   } else if (node.Symbol is Division) {
    222     //     var children = node.Subtrees.Select(st => Convert(st)).ToArray();
    223     //     if (children.Length == 1) {
    224     //       return Make("%", new[] { new SimpleSymbol("θ", 0).CreateTreeNode(), children[0] });
    225     //     } else if (children.Length != 2) throw new ArgumentException("Division is not supported for multiple arguments");
    226     //     else return Make("%", children);
    227     //   } else if (node.Symbol is Constant) {
    228     //     return new SimpleSymbol("θ", 0).CreateTreeNode();
    229     //   } else if (node.Symbol is DataAnalysis.Symbolic.Variable) {
    230     //     var varNode = node as VariableTreeNode;
    231     //     if (!varNode.Weight.IsAlmost(1.0)) throw new ArgumentException("Variable weights are not supported");
    232     //     return new SimpleSymbol(varNode.VariableName, 0).CreateTreeNode();
    233     //   } else throw new ArgumentException("Unsupported symbol: " + node.Symbol.Name);
    234     // }
    235 
    236     // private ISymbolicExpressionTreeNode Make(string op, ISymbolicExpressionTreeNode[] children) {
    237     //   if (children.Length == 1) {
    238     //     var s = new SimpleSymbol(op, 1).CreateTreeNode();
    239     //     s.AddSubtree(children.First());
    240     //     return s;
    241     //   } else {
    242     //     var s = new SimpleSymbol(op, 2).CreateTreeNode();
    243     //     var c0 = children[0];
    244     //     var c1 = children[1];
    245     //     s.AddSubtree(c0);
    246     //     s.AddSubtree(c1);
    247     //     for (int i = 2; i < children.Length; i++) {
    248     //       var sn = new SimpleSymbol(op, 2).CreateTreeNode();
    249     //       sn.AddSubtree(s);
    250     //       sn.AddSubtree(children[i]);
    251     //       s = sn;
    252     //     }
    253     //     return s;
    254     //   }
    255     // }
    256191    #endregion
    257192  }
  • branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Problem.cs

    r16602 r16603  
    234234      string odeSolver) {
    235235
     236      // extract constants from tree
    236237      var constantNodes = trees.Select(t => t.IterateNodesPrefix().OfType<ConstantTreeNode>().ToArray()).ToArray();
    237238      var initialTheta = constantNodes.Select(nodes => nodes.Select(n => n.Value).ToArray()).ToArray();
     
    373374    public static void EvaluateObjectiveVector(double[] x, double[] fi, object optimizationData) { EvaluateObjectiveVector(x, fi, (OptimizationData)optimizationData); } // for alglib
    374375    public static void EvaluateObjectiveVector(double[] x, double[] fi, OptimizationData optimizationData) {
    375       var rows = optimizationData.rows.ToArray();
     376      var rows = optimizationData.rows;
    376377      var problemData = optimizationData.problemData;
    377378      var nodeValueLookup = optimizationData.nodeValueLookup;
     
    402403      var problemData = optimizationData.problemData;
    403404      var ds = problemData.Dataset;
    404       var rows = optimizationData.episodes.SelectMany(ep => Enumerable.Range(ep.Start, ep.Size)).ToArray();
     405      var rows = optimizationData.rows;
    405406
    406407      var nodeValueLookup = optimizationData.nodeValueLookup;
    407408      nodeValueLookup.UpdateParamValues(x);
    408409
    409       // TODO add integration of latent variables
    410410      int termIdx = 0;
    411411
     
    427427
    428428          var y = optimizationData.targetValues[i][trainIdx];
    429           fi[termIdx] = y - f;
    430           if (jac != null && g != Vector.Zero) for (int j = 0; j < g.Length; j++) jac[termIdx, j] = -g[j];
     429          fi[termIdx] = (y - f) * optimizationData.inverseStandardDeviation[i]; // scale of NMSE
     430          if (jac != null && g != Vector.Zero) for (int j = 0; j < g.Length; j++) jac[termIdx, j] = -g[j] * optimizationData.inverseStandardDeviation[i];
    431431
    432432          termIdx++;
     
    455455      int outputIdx = 0;
    456456
    457       var prediction = Integrate(optimizationData, x).ToArray();
     457      nodeValueLookup.UpdateParamValues(x);
     458
     459      var predictionEnumerator = Integrate(optimizationData).GetEnumerator();
    458460      var trees = optimizationData.trees;
    459461
     462      predictionEnumerator.MoveNext();
     463      var currentPrediction = predictionEnumerator.Current;
    460464      for (int trainIdx = 0; trainIdx < rows.Length; trainIdx++) {
    461         var pred_i = prediction[Math.Min(trainIdx, prediction.Length - 1)]; // if we stopped earlier in the integration then just use the last generated value
     465        var pred_i = currentPrediction;
    462466
    463467        for (int i = 0; i < trees.Length; i++) {
     
    466470          var f = pred_i[i].Item1;
    467471          var g = pred_i[i].Item2;
    468           fi[outputIdx] = y - f;
    469           if (jac != null && g != Vector.Zero) for (int j = 0; j < g.Length; j++) jac[outputIdx, j] = -g[j];
     472          fi[outputIdx] = (y - f) * optimizationData.inverseStandardDeviation[i];  // scale for normalized squared error
     473          if (jac != null && g != Vector.Zero) for (int j = 0; j < g.Length; j++) jac[outputIdx, j] = -g[j] * optimizationData.inverseStandardDeviation[i];
    470474
    471475          outputIdx++;
    472476        }
     477        if (predictionEnumerator.MoveNext()) currentPrediction = predictionEnumerator.Current; // if we stopped early => just use the last prediction until the end
    473478      }
    474479    }
     
    513518        foreach (var episode in TrainingEpisodes) {
    514519          var episodes = new[] { episode };
    515           var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta_" + eIdx]).ToArray(); // see evaluate
    516520          var optimizationData = new OptimizationData(trees, targetVars, problemData, null, episodes, NumericIntegrationSteps, latentVariables, OdeSolver);
    517           var trainingPrediction = Integrate(optimizationData, optTheta).ToArray();
     521          var trainingPrediction = Integrate(optimizationData).ToArray();
    518522          trainingPredictions.Add(trainingPrediction);
    519523          eIdx++;
     
    546550        results["Models"].Value = models;
    547551      } else {
    548         var optTheta = Problem.ExtractParametersFromTrees(trees);
    549552        var optimizationData = new OptimizationData(trees, targetVars, problemData, null, TrainingEpisodes.ToArray(), NumericIntegrationSteps, latentVariables, OdeSolver);
    550         var trainingPrediction = Integrate(optimizationData, optTheta).ToArray();
     553        var trainingPrediction = Integrate(optimizationData).ToArray();
     554
     555
     556        var numParams = optimizationData.nodeValueLookup.ParameterCount;
    551557        // for target values and latent variables
    552558        var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start));
     
    561567            trainingDataTable.Rows.Add(predictedValuesRow);
    562568
    563             for (int paramIdx = 0; paramIdx < optTheta.Length; paramIdx++) {
     569            for (int paramIdx = 0; paramIdx < numParams; paramIdx++) {
    564570              var paramSensitivityRow = new DataRow($"∂{targetVar}/∂θ{paramIdx}", $"Sensitivities of parameter {paramIdx}", trainingPrediction.Select(arr => arr[colIdx].Item2[paramIdx]).ToArray());
    565571              paramSensitivityRow.VisualProperties.SecondYAxis = true;
     
    580586        var errorTable = new DataTable("Squared error and gradient");
    581587        var seRow = new DataRow("Squared error");
    582         var gradientRows = Enumerable.Range(0, optTheta.Length).Select(i => new DataRow($"∂SE/∂θ{i}")).ToArray();
     588        var gradientRows = Enumerable.Range(0, numParams).Select(i => new DataRow($"∂SE/∂θ{i}")).ToArray();
    583589        errorTable.Rows.Add(seRow);
    584590        foreach (var gRow in gradientRows) {
     
    592598          // calculate objective function gradient
    593599          double f_i = 0.0;
    594           Vector g_i = Vector.CreateNew(new double[optTheta.Length]);
     600          Vector g_i = Vector.CreateNew(new double[numParams]);
    595601          for (int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
    596602            var y_pred_f = y_pred[colIdx].Item1;
     
    612618        var testRows = ProblemData.TestIndices.ToArray();
    613619        var testOptimizationData = new OptimizationData(trees, targetVars, problemData, null, new IntRange[] { ProblemData.TestPartition }, NumericIntegrationSteps, latentVariables, OdeSolver);
    614         var testPrediction = Integrate(testOptimizationData, optTheta).ToArray();
     620        var testPrediction = Integrate(testOptimizationData).ToArray();
    615621
    616622        for (int colIdx = 0; colIdx < trees.Length; colIdx++) {
     
    644650        var models = new VariableCollection();    // to store target var names and original version of tree
    645651
    646         var optimizedTrees = new List<ISymbolicExpressionTree>();
    647         int nextParIdx = 0;
     652        var clonedTrees = new List<ISymbolicExpressionTree>();
    648653        for (int idx = 0; idx < trees.Length; idx++) {
    649           var tree = trees[idx];
    650           // optimizedTrees.Add(new SymbolicExpressionTree(FixParameters(tree.Root, optTheta.ToArray(), ref nextParIdx)));
    651           optimizedTrees.Add(tree);
     654          clonedTrees.Add((ISymbolicExpressionTree)trees[idx]);
    652655        }
    653656        var ds = problemData.Dataset;
    654         var newVarNames = Enumerable.Range(0, nextParIdx).Select(i => "c_" + i).ToArray();
    655         var allVarNames = ds.DoubleVariables.Concat(newVarNames);
    656         var newVarValues = Enumerable.Range(0, nextParIdx).Select(i => "c_" + i).ToArray();
    657         var allVarValues = ds.DoubleVariables.Select(varName => ds.GetDoubleValues(varName).ToList())
    658           .Concat(Enumerable.Range(0, nextParIdx).Select(i => Enumerable.Repeat(optTheta[i], ds.Rows).ToList()))
    659           .ToList();
    660         var newDs = new Dataset(allVarNames, allVarValues);
    661         var newProblemData = new RegressionProblemData(newDs, problemData.AllowedInputVariables.Concat(newVarValues).ToArray(), problemData.TargetVariable);
    662         results["Solution"].Value = new Solution(optimizedTrees.ToArray(),
     657        var newProblemData = new RegressionProblemData((IDataset)ds.Clone(), problemData.AllowedInputVariables, problemData.TargetVariable);
     658        results["Solution"].Value = new Solution(clonedTrees.ToArray(),
    663659                   // optTheta,
    664660                   newProblemData,
     
    670666
    671667
    672         nextParIdx = 0;
    673668        for (int idx = 0; idx < trees.Length; idx++) {
    674669          var varName = string.Empty;
     
    715710    // for a solver with the necessary features see: https://computation.llnl.gov/projects/sundials/cvodes
    716711
    717     public static IEnumerable<Tuple<double, Vector>[]> Integrate(OptimizationData optimizationData, double[] parameterValues) {
     712    public static IEnumerable<Tuple<double, Vector>[]> Integrate(OptimizationData optimizationData) {
    718713
    719714      var trees = optimizationData.trees;
     
    727722      var calculatedVariables = targetVariables.Concat(latentVariables).ToArray(); // TODO: must conincide with the order of trees in the encoding
    728723
    729 
    730724      var nodeValues = optimizationData.nodeValueLookup;
    731       nodeValues.UpdateParamValues(parameterValues);
    732 
    733725
    734726      // TODO: numericIntegrationSteps is only relevant for the HeuristicLab solver
     
    744736        }
    745737
    746         { // CODE BELOW MUST BE TESTED
    747           if (latentVariables.Length > 0) throw new NotImplementedException();
    748 
    749           // add value entries for latent variables which are also integrated
    750           // initial values are at the end of the parameter vector
    751           // separate initial values for each episode
    752           var initialValueIdx = parameterValues.Length - episodes.Count() * latentVariables.Length + episodeIdx * latentVariables.Length;
    753           foreach (var latentVar in latentVariables) {
    754             var arr = new double[parameterValues.Length]; // backing array
    755             arr[initialValueIdx] = 1.0;
    756             var g = new Vector(arr);
    757             nodeValues.SetVariableValue(latentVar, parameterValues[initialValueIdx], g); // we don't have observations for latent variables therefore we optimize the initial value for each episode
    758             initialValueIdx++;
    759           }
     738        { // CODE BELOW DOESN'T WORK ANYMORE
     739          // if (latentVariables.Length > 0) throw new NotImplementedException();
     740          //
     741          // // add value entries for latent variables which are also integrated
     742          // // initial values are at the end of the parameter vector
     743          // // separate initial values for each episode
     744          // var initialValueIdx = parameterValues.Length - episodes.Count() * latentVariables.Length + episodeIdx * latentVariables.Length;
     745          // foreach (var latentVar in latentVariables) {
     746          //   var arr = new double[parameterValues.Length]; // backing array
     747          //   arr[initialValueIdx] = 1.0;
     748          //   var g = new Vector(arr);
     749          //   nodeValues.SetVariableValue(latentVar, parameterValues[initialValueIdx], g); // we don't have observations for latent variables therefore we optimize the initial value for each episode
     750          //   initialValueIdx++;
     751          // }
    760752        }
    761753
     
    11291121    // TODO: use an existing interpreter implementation instead
    11301122    private static double InterpretRec(ISymbolicExpressionTreeNode node, NodeValueLookup nodeValues) {
    1131       if (node.Symbol is Constant || node.Symbol is Variable) {
     1123      if (node is ConstantTreeNode) {
     1124        return ((ConstantTreeNode)node).Value;
     1125      } else if(node is VariableTreeNode) {
    11321126        return nodeValues.NodeValue(node);
    11331127      } else if (node.Symbol is Addition) {
     
    13701364      var g = CreateGrammar();
    13711365      foreach (var targetVar in TargetVariables.CheckedItems) {
    1372         encoding = encoding.Add(new SymbolicExpressionTreeEncoding(targetVar + "_tree", g, MaximumLength, MaximumLength)); // only limit by length
     1366        var e = new SymbolicExpressionTreeEncoding(targetVar + "_tree", g, MaximumLength, MaximumLength);
     1367        var multiManipulator = e.Operators.Where(op => op is MultiSymbolicExpressionTreeManipulator).First();
     1368        var filteredOperators = e.Operators.Where(op => !(op is IManipulator)).ToArray();
     1369        // make sure our multi-manipulator is the only manipulator
     1370        e.Operators = new IOperator[] { multiManipulator }.Concat(filteredOperators);
     1371        encoding = encoding.Add(e); // only limit by length
    13731372      }
    13741373      for (int i = 1; i <= NumberOfLatentVariables; i++) {
    1375         encoding = encoding.Add(new SymbolicExpressionTreeEncoding("λ" + i + "_tree", g, MaximumLength, MaximumLength));
     1374        var e = new SymbolicExpressionTreeEncoding("λ" + i + "_tree", g, MaximumLength, MaximumLength);
     1375        var multiManipulator = e.Operators.Where(op => op is MultiSymbolicExpressionTreeManipulator).First();
     1376        var filteredOperators = e.Operators.Where(op => !(op is IManipulator)).ToArray();
     1377        // make sure our multi-manipulator is the only manipulator
     1378        e.Operators = new IOperator[] { multiManipulator }.Concat(filteredOperators);
     1379        encoding = encoding.Add(e);
    13761380      }
    13771381      Encoding = encoding;
     
    14921496      public readonly IRegressionProblemData problemData;
    14931497      public readonly double[][] targetValues;
     1498      public readonly double[] inverseStandardDeviation;
    14941499      public readonly IntRange[] episodes;
    14951500      public readonly int numericIntegrationSteps;
     
    14971502      public readonly string odeSolver;
    14981503      public readonly NodeValueLookup nodeValueLookup;
    1499       public IEnumerable<int> rows => episodes.SelectMany(ep => Enumerable.Range(ep.Start, ep.Size));
     1504      public readonly int[] rows;
    15001505
    15011506      public OptimizationData(ISymbolicExpressionTree[] trees, string[] targetVars, IRegressionProblemData problemData,
     
    15081513        this.problemData = problemData;
    15091514        this.targetValues = targetValues;
     1515        if (targetValues != null)
     1516          this.inverseStandardDeviation = targetValues.Select(vec => 1.0 / vec.StandardDeviation()).ToArray();
     1517        else
     1518          this.inverseStandardDeviation = Enumerable.Repeat(1.0, trees.Length).ToArray();
    15101519        this.episodes = episodes;
    15111520        this.numericIntegrationSteps = numericIntegrationSteps;
     
    15131522        this.odeSolver = odeSolver;
    15141523        this.nodeValueLookup = new NodeValueLookup(trees);
     1524        this.rows =  episodes.SelectMany(ep => Enumerable.Range(ep.Start, ep.Size)).ToArray();
    15151525      }
    15161526    }
     
    15191529      private readonly Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>> node2val = new Dictionary<ISymbolicExpressionTreeNode, Tuple<double, Vector>>();
    15201530      private readonly Dictionary<string, List<ISymbolicExpressionTreeNode>> name2nodes = new Dictionary<string, List<ISymbolicExpressionTreeNode>>();
    1521       private readonly Dictionary<int, ISymbolicExpressionTreeNode> paramIdx2node = new Dictionary<int, ISymbolicExpressionTreeNode>();
     1531      private readonly ConstantTreeNode[] constantNodes;
     1532      private readonly Vector[] constantGradientVectors;
     1533
     1534      // private readonly Dictionary<int, ISymbolicExpressionTreeNode> paramIdx2node = new Dictionary<int, ISymbolicExpressionTreeNode>();
    15221535
    15231536      public double NodeValue(ISymbolicExpressionTreeNode node) => node2val[node].Item1;
     
    15251538
    15261539      public NodeValueLookup(ISymbolicExpressionTree[] trees) {
    1527         int paramIdx = 0;
    1528 
    1529         var constantNodes = trees.SelectMany(t => t.IterateNodesPrefix().Where(IsConstantNode)).ToArray();
    1530         foreach (var n in constantNodes) {
    1531           paramIdx2node[paramIdx] = n;
    1532           SetParamValue(paramIdx, GetConstantValue(n), Vector.CreateIndicator(length: constantNodes.Length, idx: paramIdx));
    1533           paramIdx++;
     1540
     1541        this.constantNodes = trees.SelectMany(t => t.IterateNodesPrefix().OfType<ConstantTreeNode>()).ToArray();
     1542        constantGradientVectors = new Vector[constantNodes.Length];
     1543        for(int paramIdx = 0; paramIdx < constantNodes.Length; paramIdx++) {
     1544          constantGradientVectors[paramIdx] = Vector.CreateIndicator(length: constantNodes.Length, idx: paramIdx);
     1545
     1546          var node = constantNodes[paramIdx];
     1547          node2val[node] = Tuple.Create(node.Value, constantGradientVectors[paramIdx]);
    15341548        }
    15351549
     
    15471561      }
    15481562
    1549       public int ParameterCount => paramIdx2node.Count;
    1550 
    1551 
    1552       public void SetParamValue(int paramIdx, double val, Vector dVal) {
    1553         node2val[paramIdx2node[paramIdx]] = Tuple.Create(val, dVal);
    1554       }
     1563      public int ParameterCount => constantNodes.Length;
    15551564
    15561565      public void SetVariableValue(string variableName, double val) {
     
    15731582
    15741583      internal void UpdateParamValues(double[] x) {
    1575         Trace.Assert(x.Length == paramIdx2node.Count);
    1576         for (int paramIdx = 0; paramIdx < x.Length; paramIdx++) {
    1577           var n = paramIdx2node[paramIdx];
    1578           node2val[n] = Tuple.Create(x[paramIdx], node2val[n].Item2); // prevent allocation of new Vector
     1584        for (int i = 0; i < x.Length; i++) {
     1585          constantNodes[i].Value = x[i];
     1586          node2val[constantNodes[i]] = Tuple.Create(x[i], constantGradientVectors[i]);
    15791587        }
    15801588      }
  • branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Solution.cs

    r16602 r16603  
    8888    public IEnumerable<double[]> Predict(IntRange episode, int forecastHorizon, out double snmse) {
    8989      var forecastEpisode = new IntRange(episode.Start, episode.End + forecastHorizon);
     90      //
     91      // var random = new FastRandom(12345);
     92      // snmse = Problem.OptimizeForEpisodes(trees, problemData, targetVars, latentVariables, random, new[] { forecastEpisode }, 100, numericIntegrationSteps, odeSolver);
     93      var optimizationData = new Problem.OptimizationData(trees, targetVars, problemData, null, new[] { forecastEpisode }, numericIntegrationSteps, latentVariables, odeSolver);
     94      //
     95      //
     96      // var theta = Problem.ExtractParametersFromTrees(trees);
    9097
    91       var random = new FastRandom(12345);
    92       snmse = Problem.OptimizeForEpisodes(trees, problemData, targetVars, latentVariables, random, new[] { forecastEpisode }, 100, numericIntegrationSteps, odeSolver);
    93       var optimizationData = new Problem.OptimizationData(trees, targetVars, problemData, null, new[] { forecastEpisode }, numericIntegrationSteps, latentVariables, odeSolver);
    94 
    95 
    96       var theta = Problem.ExtractParametersFromTrees(trees);
    97 
    98       var predictions = Problem.Integrate(optimizationData, theta).ToArray();
    99       return predictions.Select(p => p.Select(pi => pi.Item1).ToArray()).ToArray();
     98      snmse = 0.0; // TODO
     99      return Problem.Integrate(optimizationData).Select(p => p.Select(pi => pi.Item1).ToArray()).ToArray();
    100100    }
    101101  }
Note: See TracChangeset for help on using the changeset viewer.