Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/SymbolicRegressionSingleObjectiveTrainingBestSolutionAnalyzer.cs @ 17991

Last change on this file since 17991 was 17991, checked in by gkronber, 3 years ago

#3128: first dump of exploratory work-in-progress code to make sure the working copy is not lost.

File size: 7.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using HeuristicLab.Common;
23using HeuristicLab.Core;
24using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
25using HeuristicLab.Parameters;
26using HEAL.Attic;
27using HeuristicLab.Data;
28using System.Collections.Generic;
29using System;
30using System.Linq;
31using HeuristicLab.Analysis.Statistics;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
34  /// <summary>
35  /// An operator that analyzes the training best symbolic regression solution for single objective symbolic regression problems.
36  /// </summary>
37  [Item("SymbolicRegressionSingleObjectiveTrainingBestSolutionAnalyzer", "An operator that analyzes the training best symbolic regression solution for single objective symbolic regression problems.")]
38  [StorableType("85786F8E-F84D-4909-9A66-620668B0C7FB")]
39  public sealed class SymbolicRegressionSingleObjectiveTrainingBestSolutionAnalyzer : SymbolicDataAnalysisSingleObjectiveTrainingBestSolutionAnalyzer<ISymbolicRegressionSolution>,
40  ISymbolicDataAnalysisInterpreterOperator, ISymbolicDataAnalysisBoundedOperator {
41    private const string ProblemDataParameterName = "ProblemData";
42    private const string SymbolicDataAnalysisTreeInterpreterParameterName = "SymbolicDataAnalysisTreeInterpreter";
43    private const string EstimationLimitsParameterName = "EstimationLimits";
44    #region parameter properties
45    public ILookupParameter<IRegressionProblemData> ProblemDataParameter {
46      get { return (ILookupParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
47    }
48    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> SymbolicDataAnalysisTreeInterpreterParameter {
49      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[SymbolicDataAnalysisTreeInterpreterParameterName]; }
50    }
51    public IValueLookupParameter<DoubleLimit> EstimationLimitsParameter {
52      get { return (IValueLookupParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName]; }
53    }
54    #endregion
55
56    [StorableConstructor]
57    private SymbolicRegressionSingleObjectiveTrainingBestSolutionAnalyzer(StorableConstructorFlag _) : base(_) { }
58    private SymbolicRegressionSingleObjectiveTrainingBestSolutionAnalyzer(SymbolicRegressionSingleObjectiveTrainingBestSolutionAnalyzer original, Cloner cloner) : base(original, cloner) { }
59    public SymbolicRegressionSingleObjectiveTrainingBestSolutionAnalyzer()
60      : base() {
61      Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName, "The problem data for the symbolic regression solution."));
62      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicDataAnalysisTreeInterpreterParameterName, "The symbolic data analysis tree interpreter for the symbolic expression tree."));
63      Parameters.Add(new ValueLookupParameter<DoubleLimit>(EstimationLimitsParameterName, "The lower and upper limit for the estimated values produced by the symbolic regression model."));
64    }
65    public override IDeepCloneable Clone(Cloner cloner) {
66      return new SymbolicRegressionSingleObjectiveTrainingBestSolutionAnalyzer(this, cloner);
67    }
68
69    protected override ISymbolicRegressionSolution CreateSolution(ISymbolicExpressionTree bestTree, double bestQuality) {
70
71      // HACK: create model first for scaling, then calculate statistics and create a new model with prediction intervals
72      var model = new SymbolicRegressionModel(ProblemDataParameter.ActualValue.TargetVariable,
73        (ISymbolicExpressionTree)bestTree.Clone(),
74        SymbolicDataAnalysisTreeInterpreterParameter.ActualValue,
75        EstimationLimitsParameter.ActualValue.Lower,
76        EstimationLimitsParameter.ActualValue.Upper);
77      if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue);
78
79      // use scaled tree
80      CalculateParameterCovariance(model.SymbolicExpressionTree, ProblemDataParameter.ActualValue, SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, out var cov, out var sigma);
81      var predIntervalModel = new SymbolicRegressionModel(ProblemDataParameter.ActualValue.TargetVariable,
82        (ISymbolicExpressionTree)model.SymbolicExpressionTree.Clone(),
83        SymbolicDataAnalysisTreeInterpreterParameter.ActualValue,
84        EstimationLimitsParameter.ActualValue.Lower,
85        EstimationLimitsParameter.ActualValue.Upper, parameterCovariance: cov, sigma: sigma);
86
87      return new SymbolicRegressionSolution(predIntervalModel, (IRegressionProblemData)ProblemDataParameter.ActualValue.Clone());
88    }
89
90    private void CalculateParameterCovariance(ISymbolicExpressionTree tree, IRegressionProblemData problemData, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, out double[,] cov, out double sigma) {
91      var y_pred = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices).ToArray();
92      var residuals = problemData.TargetVariableTrainingValues.Zip(y_pred, (yi, y_pred_i) => yi - y_pred_i).ToArray();
93
94      var paramNodes = new List<ISymbolicExpressionTreeNode>();
95      var coeffList = new List<double>();
96      foreach (var node in tree.IterateNodesPostfix()) {
97        if (node is ConstantTreeNode constNode) {
98          paramNodes.Add(constNode);
99          coeffList.Add(constNode.Value);
100        } else if (node is VariableTreeNode varNode) {
101          paramNodes.Add(varNode);
102          coeffList.Add(varNode.Weight);
103        }
104      }
105      var coeff = coeffList.ToArray();
106      var numParams = coeff.Length;
107
108      var rows = problemData.TrainingIndices.ToArray();
109      var dcoeff = new double[rows.Length, numParams];
110      TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, makeVariableWeightsVariable: true, addLinearScalingTerms: false,
111        out var parameters, out var initialConstants, out var func, out var func_grad);
112      if (initialConstants.Zip(coeff, (ici, coi) => ici != coi).Any(t => t)) throw new InvalidProgramException();
113      var ds = problemData.Dataset;
114      var x_r = new double[parameters.Count];
115      for (int r = 0; r < rows.Length; r++) {
116        // copy row
117        for (int c = 0; c < parameters.Count; c++) {
118          x_r[c] = ds.GetDoubleValue(parameters[c].variableName, rows[r]);
119        }
120        var tup = func_grad(coeff, x_r);
121        for (int c = 0; c < numParams; c++) {
122          dcoeff[r, c] = tup.Item1[c];
123        }
124      }
125
126      var stats = Statistics.CalculateLinearModelStatistics(dcoeff, coeff, residuals);
127      cov = stats.CovMx;
128      sigma = stats.sigma;
129    }
130  }
131}
Note: See TracBrowser for help on using the repository browser.