source: branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SymbolicRegressionModel.cs @ 17991

Last change on this file since 17991 was 17991, checked in by gkronber, 6 months ago

#3128: first dump of exploratory work-in-progress code to make sure the working copy is not lost.

File size: 8.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
27using HEAL.Attic;
28using HeuristicLab.Data;
29using System.Linq;
30
31namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
32  /// <summary>
33  /// Represents a symbolic regression model
34  /// </summary>
35  [StorableType("2739C33E-4DDB-4285-9DFB-C056D900B2F2")]
36  [Item(Name = "Symbolic Regression Model", Description = "Represents a symbolic regression model.")]
37  public class SymbolicRegressionModel : SymbolicDataAnalysisModel, ISymbolicRegressionModel {
38    [Storable]
39    private string targetVariable;
40    public string TargetVariable {
41      get { return targetVariable; }
42      set {
43        if (string.IsNullOrEmpty(value) || targetVariable == value) return;
44        targetVariable = value;
45        OnTargetVariableChanged(this, EventArgs.Empty);
46      }
47    }
48
49    [Storable]
50    private readonly double[,] parameterCovariance;
51    [Storable]
52    private readonly double sigma;
53
54    [StorableConstructor]
55    protected SymbolicRegressionModel(StorableConstructorFlag _) : base(_) {
56      targetVariable = string.Empty;
57    }
58
59    protected SymbolicRegressionModel(SymbolicRegressionModel original, Cloner cloner)
60      : base(original, cloner) {
61      this.targetVariable = original.targetVariable;
62      this.parameterCovariance = original.parameterCovariance; // immutable
63      this.sigma = original.sigma;
64    }
65
66    public SymbolicRegressionModel(string targetVariable, ISymbolicExpressionTree tree,
67      ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
68      double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue, double[,] parameterCovariance = null, double sigma = 0.0)
69      : base(tree, interpreter, lowerEstimationLimit, upperEstimationLimit) {
70      this.targetVariable = targetVariable;
71      if (parameterCovariance != null)
72        this.parameterCovariance = (double[,])parameterCovariance.Clone();
73      this.sigma = sigma;
74    }
75
76    public override IDeepCloneable Clone(Cloner cloner) {
77      return new SymbolicRegressionModel(this, cloner);
78    }
79
80    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
81      return Interpreter.GetSymbolicExpressionTreeValues(SymbolicExpressionTree, dataset, rows)
82        .LimitToRange(LowerEstimationLimit, UpperEstimationLimit);
83    }
84
85    public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
86      // must work with a copy because we change tree nodes
87      var treeCopy = (ISymbolicExpressionTree)SymbolicExpressionTree.Clone();
88      // uses sampling to produce prediction intervals
89      alglib.hqrndseed(31415, 926535, out var state);
90      var cov = parameterCovariance;
91      if (cov == null || cov.Length == 0) return rows.Select(_ => 0.0);
92      var n = 30;
93      var M = rows.Select(_ => new double[n]).ToArray();
94      var paramNodes = new List<ISymbolicExpressionTreeNode>();
95      var coeffList = new List<double>();
96      // HACK: skip linear scaling parameters because the analyzer doesn't use them (and they are likely correlated with the remaining parameters)
97      // only works with linear scaling
98      if (!(treeCopy.Root.GetSubtree(0).GetSubtree(0).Symbol is Addition) ||
99          !(treeCopy.Root.GetSubtree(0).GetSubtree(0).GetSubtree(0).Symbol is Multiplication))
100        throw new NotImplementedException("prediction intervals are implemented only for linear scaling");
101
102      foreach (var node in treeCopy.Root.GetSubtree(0).GetSubtree(0).IterateNodesPostfix()) {
103        if (node is ConstantTreeNode constNode) {
104          paramNodes.Add(constNode);
105          coeffList.Add(constNode.Value);
106        } else if (node is VariableTreeNode varNode) {
107          paramNodes.Add(varNode);
108          coeffList.Add(varNode.Weight);
109        }
110      }
111      var coeff = coeffList.ToArray();
112      var numParams = coeff.Length;
113      if (cov.GetLength(0) != numParams) throw new InvalidProgramException();
114
115      // TODO: probably we do not need to sample but can instead use a first-order or second-order approximation of f
116      // see http://sia.webpopix.org/nonlinearRegression.html
117      // also see https://rmazing.wordpress.com/2013/08/26/predictnls-part-2-taylor-approximation-confidence-intervals-for-nls-models/
118      // https://www.rdocumentation.org/packages/propagate/versions/1.0-4/topics/predictNLS
119      double[] p = new double[numParams];
120      for (int i = 0; i < 30; i++) {
121        // sample and update parameter vector delta is
122        alglib.hqrndnormalv(state, numParams, out var delta);
123        alglib.rmatrixmv(numParams, numParams, cov, 0, 0, 0, delta, 0, ref p, 0);
124        for (int j = 0; j < numParams; j++) {
125          if (paramNodes[j] is ConstantTreeNode constNode) constNode.Value = coeff[j] + p[j];
126          else if (paramNodes[j] is VariableTreeNode varNode) varNode.Weight = coeff[j] + p[j];
127        }
128        var r = 0;
129        var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(treeCopy, dataset, rows).LimitToRange(LowerEstimationLimit, UpperEstimationLimit);
130
131        foreach (var pred in estimatedValues) {
132          M[r++][i] = pred;
133        }
134      }
135
136      // reset parameters
137      for (int j = 0; j < numParams; j++) {
138        if (paramNodes[j] is ConstantTreeNode constNode) constNode.Value = coeff[j];
139        else if (paramNodes[j] is VariableTreeNode varNode) varNode.Weight = coeff[j];
140      }
141      var sigma2 = sigma * sigma;
142      return M.Select(M_i => M_i.Variance() + sigma2).ToArray();
143    }
144
145    public ISymbolicRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
146      return new SymbolicRegressionSolution(this, new RegressionProblemData(problemData));
147    }
148    IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
149      return CreateRegressionSolution(problemData);
150    }
151
152    public void Scale(IRegressionProblemData problemData) {
153      Scale(problemData, problemData.TargetVariable);
154    }
155
156    public virtual bool IsProblemDataCompatible(IRegressionProblemData problemData, out string errorMessage) {
157      return RegressionModel.IsProblemDataCompatible(this, problemData, out errorMessage);
158    }
159
160    public override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {
161      if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
162      var regressionProblemData = problemData as IRegressionProblemData;
163      if (regressionProblemData == null)
164        throw new ArgumentException("The problem data is not compatible with this symbolic regression model. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
165      return IsProblemDataCompatible(regressionProblemData, out errorMessage);
166    }
167
168    #region events
169    public event EventHandler TargetVariableChanged;
170    private void OnTargetVariableChanged(object sender, EventArgs args) {
171      var changed = TargetVariableChanged;
172      if (changed != null)
173        changed(sender, args);
174    }
175    #endregion
176  }
177}
Note: See TracBrowser for help on using the repository browser.