Changeset 16155


Ignore:
Timestamp:
09/18/18 11:28:37 (13 months ago)
Author:
gkronber
Message:

#2925: allow tuning individual parameter vectors for episodes (using the same structure)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/2925_AutoDiffForDynamicalModels/HeuristicLab.Problems.DynamicalSystemsModelling/3.3/Problem.cs

    r16154 r16155  
    117117    private const string NumericIntegrationStepsParameterName = "Steps for numeric integration";
    118118    private const string TrainingEpisodesParameterName = "Training episodes";
     119    private const string OptimizeParametersForEpisodesParameterName = "Optimize parameters for episodes";
    119120    #endregion
    120121
     
    145146    public IValueParameter<ItemList<IntRange>> TrainingEpisodesParameter {
    146147      get { return (IValueParameter<ItemList<IntRange>>)Parameters[TrainingEpisodesParameterName]; }
     148    }
     149    public IFixedValueParameter<BoolValue> OptimizeParametersForEpisodesParameter {
     150      get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizeParametersForEpisodesParameterName]; }
    147151    }
    148152    #endregion
     
    177181    public IEnumerable<IntRange> TrainingEpisodes {
    178182      get { return TrainingEpisodesParameter.Value; }
     183    }
     184    public bool OptimizeParametersForEpisodes {
     185      get { return OptimizeParametersForEpisodesParameter.Value.Value; }
    179186    }
    180187
     
    193200    [StorableHook(HookType.AfterDeserialization)]
    194201    private void AfterDeserialization() {
     202      if(!Parameters.ContainsKey(OptimizeParametersForEpisodesParameterName)) {
     203        Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
     204      }
    195205      RegisterEventHandlers();
    196206    }
     
    216226      Parameters.Add(new FixedValueParameter<IntValue>(NumericIntegrationStepsParameterName, "Number of steps in the numeric integration that are taken from one row to the next (set to 1 to 100). More steps makes the algorithm slower, less steps worsens the accuracy of the numeric integration scheme.", new IntValue(10)));
    217227      Parameters.Add(new ValueParameter<ItemList<IntRange>>(TrainingEpisodesParameterName, "A list of ranges that should be used for training, each range represents an independent episode. This overrides the TrainingSet parameter in ProblemData.", new ItemList<IntRange>()));
    218 
     228      Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersForEpisodesParameterName, "Flag to select if parameters should be optimized globally or for each episode individually.", new BoolValue(false)));
    219229      RegisterEventHandlers();
    220230      InitAllParameters();
     
    225235    }
    226236
    227 
    228237    public override double Evaluate(Individual individual, IRandom random) {
    229238      var trees = individual.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
    230239
     240      if(OptimizeParametersForEpisodes) {
     241        int eIdx = 0;
     242        double totalNMSE = 0.0;
     243        int totalSize = 0;
     244        foreach(var episode in TrainingEpisodes) {
     245          double[] optTheta;
     246          double nmse;
     247          OptimizeForEpisodes(trees, random, new[] { episode }, out optTheta, out nmse);
     248          individual["OptTheta_" + eIdx] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
     249          eIdx++;
     250          totalNMSE += nmse * episode.Size;
     251          totalSize += episode.Size;
     252        }
     253        return totalNMSE / totalSize;
     254      } else {
     255        double[] optTheta;
     256        double nmse;
     257        OptimizeForEpisodes(trees, random, TrainingEpisodes, out optTheta, out nmse);
     258        individual["OptTheta"] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
     259        return nmse;
     260      }
     261    }
     262
     263    private void OptimizeForEpisodes(ISymbolicExpressionTree[] trees, IRandom random, IEnumerable<IntRange> episodes, out double[] optTheta, out double nmse) {
     264      var rows = episodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start)).ToArray();
    231265      var problemData = ProblemData;
    232       var rows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start)).ToArray();
    233266      var targetVars = TargetVariables.CheckedItems.Select(i => i.Value).ToArray();
    234267      var latentVariables = Enumerable.Range(1, NumberOfLatentVariables).Select(i => "λ" + i).ToArray(); // TODO: must coincide with the variables which are actually defined in the grammar and also for which we actually have trees
     
    256289      var theta = nodeIdx.Select(_ => random.NextDouble() * 2.0 - 1.0).ToArray(); // init params randomly from Unif(-1,1)
    257290
    258       double[] optTheta = new double[0];
     291      optTheta = new double[0];
    259292      if(theta.Length > 0) {
    260293        alglib.minlbfgsstate state;
     
    263296        alglib.minlbfgssetcond(state, 0.0, 0.0, 0.0, MaximumParameterOptimizationIterations);
    264297        alglib.minlbfgsoptimize(state, EvaluateObjectiveAndGradient, null,
    265           new object[] { trees, targetVars, problemData, nodeIdx, targetValues, TrainingEpisodes.ToArray(), NumericIntegrationSteps, latentVariables }); //TODO: create a type
     298          new object[] { trees, targetVars, problemData, nodeIdx, targetValues, episodes.ToArray(), NumericIntegrationSteps, latentVariables }); //TODO: create a type
    266299        alglib.minlbfgsresults(state, out optTheta, out report);
    267300
     
    292325                          * NFEV countains number of function calculations
    293326         */
    294         if(report.terminationtype < 0) return double.MaxValue;
     327        if(report.terminationtype < 0) { nmse = 10E6; return; }
    295328      }
    296329
    297330      // perform evaluation for optimal theta to get quality value
    298331      double[] grad = new double[optTheta.Length];
    299       double optQuality = double.NaN;
    300       EvaluateObjectiveAndGradient(optTheta, ref optQuality, grad,
    301         new object[] { trees, targetVars, problemData, nodeIdx, targetValues, TrainingEpisodes.ToArray(), NumericIntegrationSteps, latentVariables });
    302       if(double.IsNaN(optQuality) || double.IsInfinity(optQuality)) return 10E6; // return a large value (TODO: be consistent by using NMSE)
    303 
    304       individual["OptTheta"] = new DoubleArray(optTheta); // write back optimized parameters so that we can use them in the Analysis method
    305       return optQuality;
     332      nmse = double.NaN;
     333      EvaluateObjectiveAndGradient(optTheta, ref nmse, grad,
     334        new object[] { trees, targetVars, problemData, nodeIdx, targetValues, episodes.ToArray(), NumericIntegrationSteps, latentVariables });
     335      if(double.IsNaN(nmse) || double.IsInfinity(nmse)) { nmse = 10E6; return; } // return a large value (TODO: be consistent by using NMSE)
    306336    }
    307337
     
    371401      }
    372402
     403      var bestIndividualAndQuality = this.GetBestIndividual(individuals, qualities);
     404      var trees = bestIndividualAndQuality.Item1.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
     405
    373406      // TODO extract common functionality from Evaluate and Analyze
    374       var bestIndividualAndQuality = this.GetBestIndividual(individuals, qualities);
    375       var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta"]).ToArray(); // see evaluate
    376       var trees = bestIndividualAndQuality.Item1.Values.Select(v => v.Value).OfType<ISymbolicExpressionTree>().ToArray(); // extract all trees from individual
    377407      var nodeIdx = new Dictionary<ISymbolicExpressionTreeNode, int>();
    378 
    379 
    380408      foreach(var tree in trees) {
    381409        foreach(var node in tree.Root.IterateNodesPrefix().Where(n => IsConstantNode(n))) {
     
    388416
    389417      var trainingList = new ItemList<DataTable>();
    390       var trainingPrediction = Integrate(
    391        trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
    392        problemData.Dataset,
    393        problemData.AllowedInputVariables.ToArray(),
    394        targetVars,
    395        latentVariables,
    396        TrainingEpisodes,
    397        nodeIdx,
    398        optTheta,
    399        NumericIntegrationSteps).ToArray();
    400 
    401       // only for actual target values
    402       var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start));
    403       for(int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
    404         var targetVar = targetVars[colIdx];
    405         var trainingDataTable = new DataTable(targetVar + " prediction (training)");
    406         var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
    407         var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPrediction.Select(arr => arr[colIdx].Item1).ToArray());
    408         trainingDataTable.Rows.Add(actualValuesRow);
    409         trainingDataTable.Rows.Add(predictedValuesRow);
    410         trainingList.Add(trainingDataTable);
    411       }
    412 
    413       // TODO: DRY for training and test
    414       var testList = new ItemList<DataTable>();
    415       var testRows = ProblemData.TestIndices.ToArray();
    416       var testPrediction = Integrate(
    417        trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
    418        problemData.Dataset,
    419        problemData.AllowedInputVariables.ToArray(),
    420        targetVars,
    421        latentVariables,
    422        new IntRange[] { ProblemData.TestPartition },
    423        nodeIdx,
    424        optTheta,
    425        NumericIntegrationSteps).ToArray();
    426 
    427       for(int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
    428         var targetVar = targetVars[colIdx];
    429         var testDataTable = new DataTable(targetVar + " prediction (test)");
    430         var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, testRows));
    431         var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
    432         testDataTable.Rows.Add(actualValuesRow);
    433         testDataTable.Rows.Add(predictedValuesRow);
    434         testList.Add(testDataTable);
    435       }
    436 
    437       results["Prediction (training)"].Value = trainingList.AsReadOnly();
    438       results["Prediction (test)"].Value = testList.AsReadOnly();
    439 
    440       #region simplification of models
    441       // TODO the dependency of HeuristicLab.Problems.DataAnalysis.Symbolic is not ideal
    442       var models = new VariableCollection();    // to store target var names and original version of tree
    443 
    444       foreach(var tup in targetVars.Zip(trees, Tuple.Create)) {
    445         var targetVarName = tup.Item1;
    446         var tree = tup.Item2;
    447 
    448         // when we reference HeuristicLab.Problems.DataAnalysis.Symbolic we can translate symbols
    449         int nextParIdx = 0;
    450         var shownTree = new SymbolicExpressionTree(TranslateTreeNode(tree.Root, optTheta, ref nextParIdx));
    451 
    452         // var shownTree = (SymbolicExpressionTree)tree.Clone();
    453         // var constantsNodeOrig = tree.IterateNodesPrefix().Where(IsConstantNode);
    454         // var constantsNodeShown = shownTree.IterateNodesPrefix().Where(IsConstantNode);
    455         //
    456         // foreach (var n in constantsNodeOrig.Zip(constantsNodeShown, (original, shown) => new { original, shown })) {
    457         //   double constantsVal = optTheta[nodeIdx[n.original]];
    458         //
    459         //   ConstantTreeNode replacementNode = new ConstantTreeNode(new Constant()) { Value = constantsVal };
    460         //
    461         //   var parentNode = n.shown.Parent;
    462         //   int replacementIndex = parentNode.IndexOfSubtree(n.shown);
    463         //   parentNode.RemoveSubtree(replacementIndex);
    464         //   parentNode.InsertSubtree(replacementIndex, replacementNode);
    465         // }
    466 
    467         var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)");
    468         origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
    469         models.Add(origTreeVar);
    470         var simplifiedTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(simplified)");
    471         simplifiedTreeVar.Value = TreeSimplifier.Simplify(shownTree);
    472         models.Add(simplifiedTreeVar);
    473 
    474       }
    475       results["Models"].Value = models;
    476       #endregion
     418
     419      if(OptimizeParametersForEpisodes) {
     420        var eIdx = 0;
     421        var trainingPredictions = new List<Tuple<double, Vector>[][]>();
     422        foreach(var episode in TrainingEpisodes) {
     423          var episodes = new[] { episode };
     424          var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta_" + eIdx]).ToArray(); // see evaluate
     425          var trainingPrediction = Integrate(
     426                                   trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
     427                                   problemData.Dataset,
     428                                   problemData.AllowedInputVariables.ToArray(),
     429                                   targetVars,
     430                                   latentVariables,
     431                                   episodes,
     432                                   nodeIdx,
     433                                   optTheta,
     434                                   NumericIntegrationSteps).ToArray();
     435          trainingPredictions.Add(trainingPrediction);
     436          eIdx++;
     437        }
     438
     439        // only for actual target values
     440        var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start));
     441        for(int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
     442          var targetVar = targetVars[colIdx];
     443          var trainingDataTable = new DataTable(targetVar + " prediction (training)");
     444          var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
     445          var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPredictions.SelectMany(arr => arr.Select(row => row[colIdx].Item1)).ToArray());
     446          trainingDataTable.Rows.Add(actualValuesRow);
     447          trainingDataTable.Rows.Add(predictedValuesRow);
     448          trainingList.Add(trainingDataTable);
     449        }
     450        results["Prediction (training)"].Value = trainingList.AsReadOnly();
     451
     452
     453        var models = new VariableCollection();
     454
     455        foreach(var tup in targetVars.Zip(trees, Tuple.Create)) {
     456          var targetVarName = tup.Item1;
     457          var tree = tup.Item2;
     458
     459          var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)");
     460          origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
     461          models.Add(origTreeVar);
     462        }
     463        results["Models"].Value = models;
     464      } else {
     465        var optTheta = ((DoubleArray)bestIndividualAndQuality.Item1["OptTheta"]).ToArray(); // see evaluate
     466        var trainingPrediction = Integrate(
     467                                   trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
     468                                   problemData.Dataset,
     469                                   problemData.AllowedInputVariables.ToArray(),
     470                                   targetVars,
     471                                   latentVariables,
     472                                   TrainingEpisodes,
     473                                   nodeIdx,
     474                                   optTheta,
     475                                   NumericIntegrationSteps).ToArray();
     476        // only for actual target values
     477        var trainingRows = TrainingEpisodes.SelectMany(e => Enumerable.Range(e.Start, e.End - e.Start));
     478        for(int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
     479          var targetVar = targetVars[colIdx];
     480          var trainingDataTable = new DataTable(targetVar + " prediction (training)");
     481          var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, trainingRows));
     482          var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, trainingPrediction.Select(arr => arr[colIdx].Item1).ToArray());
     483          trainingDataTable.Rows.Add(actualValuesRow);
     484          trainingDataTable.Rows.Add(predictedValuesRow);
     485          trainingList.Add(trainingDataTable);
     486        }
     487        // TODO: DRY for training and test
     488        var testList = new ItemList<DataTable>();
     489        var testRows = ProblemData.TestIndices.ToArray();
     490        var testPrediction = Integrate(
     491         trees,  // we assume trees contain expressions for the change of each target variable over time y'(t)
     492         problemData.Dataset,
     493         problemData.AllowedInputVariables.ToArray(),
     494         targetVars,
     495         latentVariables,
     496         new IntRange[] { ProblemData.TestPartition },
     497         nodeIdx,
     498         optTheta,
     499         NumericIntegrationSteps).ToArray();
     500
     501        for(int colIdx = 0; colIdx < targetVars.Length; colIdx++) {
     502          var targetVar = targetVars[colIdx];
     503          var testDataTable = new DataTable(targetVar + " prediction (test)");
     504          var actualValuesRow = new DataRow(targetVar, "The values of " + targetVar, problemData.Dataset.GetDoubleValues(targetVar, testRows));
     505          var predictedValuesRow = new DataRow(targetVar + " pred.", "Predicted values for " + targetVar, testPrediction.Select(arr => arr[colIdx].Item1).ToArray());
     506          testDataTable.Rows.Add(actualValuesRow);
     507          testDataTable.Rows.Add(predictedValuesRow);
     508          testList.Add(testDataTable);
     509        }
     510
     511        results["Prediction (training)"].Value = trainingList.AsReadOnly();
     512        results["Prediction (test)"].Value = testList.AsReadOnly();
     513        #region simplification of models
     514        // TODO the dependency of HeuristicLab.Problems.DataAnalysis.Symbolic is not ideal
     515        var models = new VariableCollection();    // to store target var names and original version of tree
     516
     517        foreach(var tup in targetVars.Zip(trees, Tuple.Create)) {
     518          var targetVarName = tup.Item1;
     519          var tree = tup.Item2;
     520
     521          // when we reference HeuristicLab.Problems.DataAnalysis.Symbolic we can translate symbols
     522          int nextParIdx = 0;
     523          var shownTree = new SymbolicExpressionTree(TranslateTreeNode(tree.Root, optTheta, ref nextParIdx));
     524
     525          // var shownTree = (SymbolicExpressionTree)tree.Clone();
     526          // var constantsNodeOrig = tree.IterateNodesPrefix().Where(IsConstantNode);
     527          // var constantsNodeShown = shownTree.IterateNodesPrefix().Where(IsConstantNode);
     528          //
     529          // foreach (var n in constantsNodeOrig.Zip(constantsNodeShown, (original, shown) => new { original, shown })) {
     530          //   double constantsVal = optTheta[nodeIdx[n.original]];
     531          //
     532          //   ConstantTreeNode replacementNode = new ConstantTreeNode(new Constant()) { Value = constantsVal };
     533          //
     534          //   var parentNode = n.shown.Parent;
     535          //   int replacementIndex = parentNode.IndexOfSubtree(n.shown);
     536          //   parentNode.RemoveSubtree(replacementIndex);
     537          //   parentNode.InsertSubtree(replacementIndex, replacementNode);
     538          // }
     539
     540          var origTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(original)");
     541          origTreeVar.Value = (ISymbolicExpressionTree)tree.Clone();
     542          models.Add(origTreeVar);
     543          var simplifiedTreeVar = new HeuristicLab.Core.Variable(targetVarName + "(simplified)");
     544          simplifiedTreeVar.Value = TreeSimplifier.Simplify(shownTree);
     545          models.Add(simplifiedTreeVar);
     546
     547        }
     548        results["Models"].Value = models;
     549        #endregion
     550      }
    477551    }
    478552
Note: See TracChangeset for help on using the changeset viewer.